From bd9ac40babf894ed9c76112aa07161d79f070b72 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 2 Aug 2018 13:03:12 +0200 Subject: [PATCH] Set non-aux mode for non-pure stateful kernel arguments Change-Id: Id59fc3ae966a1f6632a24cbcfca3fb0aef0dd76d --- runtime/gtpin/gtpin_callbacks.cpp | 2 +- runtime/kernel/kernel.cpp | 5 ++++- runtime/mem_obj/buffer.cpp | 2 +- runtime/mem_obj/buffer.h | 4 ++-- runtime/mem_obj/buffer.inl | 4 ++-- unit_tests/helpers/base_object_tests.cpp | 4 ++-- unit_tests/mem_obj/buffer_set_arg_tests.cpp | 22 +++++++++++++++++++ unit_tests/mem_obj/buffer_tests.cpp | 8 +++---- unit_tests/mocks/mock_buffer.h | 6 ++--- .../linux/drm_command_stream_tests.cpp | 2 +- 10 files changed, 42 insertions(+), 17 deletions(-) diff --git a/runtime/gtpin/gtpin_callbacks.cpp b/runtime/gtpin/gtpin_callbacks.cpp index 9403b7f79e..7a653425bb 100644 --- a/runtime/gtpin/gtpin_callbacks.cpp +++ b/runtime/gtpin/gtpin_callbacks.cpp @@ -148,7 +148,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); - pBuffer->setArgStateful(pSurfaceState); + pBuffer->setArgStateful(pSurfaceState, false); } } diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 59a54dcfc4..87d81d5eed 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -1109,9 +1109,12 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, this->patchInfoDataList.push_back(patchInfoData); } + bool forceNonAuxMode = buffer->getGraphicsAllocation()->getAllocationType() == GraphicsAllocation::AllocationType::BUFFER_COMPRESSED && + !kernelArgInfo.pureStatefulBufferAccess; + if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); - buffer->setArgStateful(surfaceState); + buffer->setArgStateful(surfaceState, forceNonAuxMode); } return CL_SUCCESS; diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index b58b73f31b..d0f33324e2 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -451,7 +451,7 @@ void Buffer::setSurfaceState(const Device *device, GraphicsAllocation *gfxAlloc, cl_mem_flags flags) { auto buffer = Buffer::createBufferHwFromDevice(device, flags, svmSize, svmPtr, svmPtr, gfxAlloc, false, false, false); - buffer->setArgStateful(surfaceState); + buffer->setArgStateful(surfaceState, false); buffer->graphicsAllocation = nullptr; delete buffer; } diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index 55048c8bf0..8e1a2d7e0f 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -102,7 +102,7 @@ class Buffer : public MemObj { bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); } uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing); - virtual void setArgStateful(void *memory) = 0; + virtual void setArgStateful(void *memory, bool forceNonAuxMode) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, @@ -158,7 +158,7 @@ class BufferHw : public Buffer { : Buffer(context, flags, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed) {} - void setArgStateful(void *memory) override; + void setArgStateful(void *memory, bool forceNonAuxMode) override; static Buffer *create(Context *context, cl_mem_flags flags, diff --git a/runtime/mem_obj/buffer.inl b/runtime/mem_obj/buffer.inl index 01f2e62e70..6d31a4c78f 100644 --- a/runtime/mem_obj/buffer.inl +++ b/runtime/mem_obj/buffer.inl @@ -41,7 +41,7 @@ union SURFACE_STATE_BUFFER_LENGTH { }; template -void BufferHw::setArgStateful(void *memory) { +void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; @@ -86,7 +86,7 @@ void BufferHw::setArgStateful(void *memory) { Gmm *gmm = graphicsAllocation ? graphicsAllocation->gmm : nullptr; - if (gmm && gmm->isRenderCompressed) { + if (gmm && gmm->isRenderCompressed && !forceNonAuxMode) { // Its expected to not program pitch/qpitch/baseAddress for Aux surface in CCS scenarios surfaceState->setCoherencyType(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT); surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E); diff --git a/unit_tests/helpers/base_object_tests.cpp b/unit_tests/helpers/base_object_tests.cpp index 32e19b1135..93cea2ffdd 100644 --- a/unit_tests/helpers/base_object_tests.cpp +++ b/unit_tests/helpers/base_object_tests.cpp @@ -94,7 +94,7 @@ class MockObject : public Buffer { return this->isValid(); } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { } cl_ulong validMagic; @@ -291,7 +291,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { MockBuffer() : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) { } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { } void setFakeOwnership() { diff --git a/unit_tests/mem_obj/buffer_set_arg_tests.cpp b/unit_tests/mem_obj/buffer_set_arg_tests.cpp index 1c33beb323..09bae7d6fc 100644 --- a/unit_tests/mem_obj/buffer_set_arg_tests.cpp +++ b/unit_tests/mem_obj/buffer_set_arg_tests.cpp @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#include "runtime/gmm_helper/gmm.h" #include "runtime/helpers/ptr_math.h" #include "runtime/kernel/kernel.h" #include "runtime/memory_manager/surface.h" @@ -168,6 +169,27 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramN EXPECT_EQ(memcmp(sshOriginal, surfaceStateHeap, sizeof(surfaceStateHeap)), 0); } +HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSetThenSetNonAuxMode) { + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + auto surfaceState = reinterpret_cast(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap)); + buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + buffer->getGraphicsAllocation()->gmm = new Gmm(buffer->getGraphicsAllocation()->getUnderlyingBuffer(), buffer->getSize(), false); + buffer->getGraphicsAllocation()->gmm->isRenderCompressed = true; + pKernelInfo->requiresSshForBuffers = true; + cl_mem clMem = buffer; + + pKernelInfo->kernelArgInfo.at(0).pureStatefulBufferAccess = false; + cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); + EXPECT_EQ(CL_SUCCESS, ret); + EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState->getAuxiliarySurfaceMode()); + + pKernelInfo->kernelArgInfo.at(0).pureStatefulBufferAccess = true; + ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMem); + EXPECT_EQ(CL_SUCCESS, ret); + EXPECT_TRUE(RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState->getAuxiliarySurfaceMode()); +} + TEST_F(BufferSetArgTest, setKernelArgBufferFor32BitAddressing) { auto pKernelArg = (void **)(pKernel->getCrossThreadData() + pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset); diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index 42ba640229..f0f00a6c11 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -1162,7 +1162,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState); + buffer->setArgStateful(&surfaceState, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1197,7 +1197,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - subBuffer->setArgStateful(&surfaceState); + subBuffer->setArgStateful(&surfaceState, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1223,7 +1223,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState buffer->getGraphicsAllocation()->gmm = gmm; gmm->isRenderCompressed = true; - buffer->setArgStateful(&surfaceState); + buffer->setArgStateful(&surfaceState, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode()); @@ -1243,7 +1243,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt buffer->getGraphicsAllocation()->gmm = gmm; gmm->isRenderCompressed = false; - buffer->setArgStateful(&surfaceState); + buffer->setArgStateful(&surfaceState, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); diff --git a/unit_tests/mocks/mock_buffer.h b/unit_tests/mocks/mock_buffer.h index f50dbf487c..a717707a1b 100644 --- a/unit_tests/mocks/mock_buffer.h +++ b/unit_tests/mocks/mock_buffer.h @@ -58,7 +58,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { this->graphicsAllocation = &this->mockGfxAllocation; } } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation); } GraphicsAllocation *externalAlloc = nullptr; @@ -71,7 +71,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer { } AlignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), gfxAllocation, true, false, false) { } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; @@ -83,7 +83,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer { } UnalignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), gfxAllocation, false, false, false) { } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; diff --git a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp index 025249f246..db4ee797d2 100644 --- a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp +++ b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp @@ -1682,7 +1682,7 @@ class DrmMockBuffer : public Buffer { gfxAllocation(alloc) { } - void setArgStateful(void *memory) override { + void setArgStateful(void *memory, bool forceNonAuxMode) override { } protected: