diff --git a/runtime/gtpin/gtpin_callbacks.cpp b/runtime/gtpin/gtpin_callbacks.cpp index 6a185a10db..a9b1c8fc82 100644 --- a/runtime/gtpin/gtpin_callbacks.cpp +++ b/runtime/gtpin/gtpin_callbacks.cpp @@ -138,7 +138,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); - pBuffer->setArgStateful(pSurfaceState, false, false); + pBuffer->setArgStateful(pSurfaceState, false, false, false); } } diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index a8a4858bf4..8a1ddab84b 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -1187,7 +1187,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); - buffer->setArgStateful(surfaceState, forceNonAuxMode, auxTranslationKernel); + buffer->setArgStateful(surfaceState, forceNonAuxMode, auxTranslationKernel, kernelArgInfo.isReadOnly); kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); } addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index f042c76b17..621bd39a8c 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -534,7 +534,7 @@ Buffer *Buffer::createBufferHwFromDevice(const Device *device, return pBuffer; } -uint32_t Buffer::getMocsValue(bool disableL3Cache) const { +uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const { uint64_t bufferAddress = 0; size_t bufferSize = 0; if (getGraphicsAllocation()) { @@ -546,7 +546,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache) const { } bufferAddress += this->offset; - bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY); + bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument; bool alignedMemObj = isAligned(bufferAddress) && isAligned(bufferSize); @@ -565,7 +565,7 @@ void Buffer::setSurfaceState(const Device *device, GraphicsAllocation *gfxAlloc, cl_mem_flags flags) { auto buffer = Buffer::createBufferHwFromDevice(device, flags, svmSize, svmPtr, svmPtr, gfxAlloc, true, false, false); - buffer->setArgStateful(surfaceState, false, false); + buffer->setArgStateful(surfaceState, false, false, false); buffer->graphicsAllocation = nullptr; delete buffer; } diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index b89757e14b..1dcb45fd3e 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -109,7 +109,7 @@ class Buffer : public MemObj { bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); } uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing); - virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) = 0; + virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnly) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, @@ -124,7 +124,7 @@ class Buffer : public MemObj { bool isReadWriteOnCpuAllowed(cl_bool blocking, cl_uint numEventsInWaitList, void *ptr, size_t size); - uint32_t getMocsValue(bool disableL3Cache) const; + uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const; protected: Buffer(Context *context, @@ -169,8 +169,8 @@ class BufferHw : public Buffer { : Buffer(context, properties, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed) {} - void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) override; - void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation); + void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnlyArgument) override; + void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument); static Buffer *create(Context *context, MemoryProperties properties, diff --git a/runtime/mem_obj/buffer_base.inl b/runtime/mem_obj/buffer_base.inl index c026832783..4c513d8282 100644 --- a/runtime/mem_obj/buffer_base.inl +++ b/runtime/mem_obj/buffer_base.inl @@ -28,7 +28,7 @@ union SURFACE_STATE_BUFFER_LENGTH { }; template -void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) { +void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnlyArgument) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; @@ -62,7 +62,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, boo surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); - surfaceState->setMemoryObjectControlState(getMocsValue(programForAuxTranslation)); + surfaceState->setMemoryObjectControlState(getMocsValue(programForAuxTranslation, isReadOnlyArgument)); surfaceState->setSurfaceBaseAddress(bufferAddressAligned); Gmm *gmm = graphicsAllocation ? graphicsAllocation->getDefaultGmm() : nullptr; @@ -77,7 +77,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, boo surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } - appendBufferState(memory, context, getGraphicsAllocation()); + appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument); } } // namespace NEO diff --git a/runtime/mem_obj/buffer_bdw_plus.inl b/runtime/mem_obj/buffer_bdw_plus.inl index 1dc2164619..2a2404f14b 100644 --- a/runtime/mem_obj/buffer_bdw_plus.inl +++ b/runtime/mem_obj/buffer_bdw_plus.inl @@ -10,7 +10,7 @@ namespace NEO { template -void BufferHw::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation) { +void BufferHw::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnly) { } } // namespace NEO diff --git a/unit_tests/gen9/kernel_tests_gen9.cpp b/unit_tests/gen9/kernel_tests_gen9.cpp index c25a52f872..b2b2ebe2db 100644 --- a/unit_tests/gen9/kernel_tests_gen9.cpp +++ b/unit_tests/gen9/kernel_tests_gen9.cpp @@ -46,7 +46,7 @@ GEN9TEST_F(Gen9HardwareCommandsTest, givenBufferThatIsNotZeroCopyWhenSurfaceStat auto gmmHelper = context.getDevice(0)->getExecutionEnvironment()->getGmmHelper(); gmmHelper->setSimplifiedMocsTableUsage(true); - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); //make sure proper mocs is selected constexpr uint32_t expectedMocs = GmmHelper::cacheEnabledIndex; EXPECT_EQ(expectedMocs, surfaceState.getMemoryObjectControlStateIndexToMocsTables()); diff --git a/unit_tests/helpers/base_object_tests.cpp b/unit_tests/helpers/base_object_tests.cpp index ad443791a9..03d52f2b67 100644 --- a/unit_tests/helpers/base_object_tests.cpp +++ b/unit_tests/helpers/base_object_tests.cpp @@ -74,7 +74,7 @@ class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {} + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {} }; template <> @@ -278,7 +278,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { MockBuffer() : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override { } }; diff --git a/unit_tests/mem_obj/buffer_set_arg_tests.cpp b/unit_tests/mem_obj/buffer_set_arg_tests.cpp index 4f13fdf946..350ec71d1e 100644 --- a/unit_tests/mem_obj/buffer_set_arg_tests.cpp +++ b/unit_tests/mem_obj/buffer_set_arg_tests.cpp @@ -8,6 +8,7 @@ #include "core/helpers/ptr_math.h" #include "core/unit_tests/helpers/debug_manager_state_restore.h" #include "runtime/gmm_helper/gmm.h" +#include "runtime/gmm_helper/gmm_helper.h" #include "runtime/kernel/kernel.h" #include "runtime/memory_manager/surface.h" #include "runtime/memory_manager/unified_memory_manager.h" @@ -139,6 +140,35 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSu EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW); } +HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) { + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + + auto surfaceState = reinterpret_cast( + ptrOffset(pKernel->getSurfaceStateHeap(), + pKernelInfo->kernelArgInfo[0].offsetHeap)); + + pKernelInfo->requiresSshForBuffers = true; + pKernelInfo->kernelArgInfo[0].isReadOnly = true; + + auto graphicsAllocation = castToObject(buffer)->getGraphicsAllocation(); + graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1); + + cl_mem clMemBuffer = buffer; + + cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer); + + EXPECT_EQ(CL_SUCCESS, ret); + + auto mocs = surfaceState->getMemoryObjectControlState(); + auto gmmHelper = pDevice->getGmmHelper(); + auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + if (expectedMocs != mocs && + expectedMocs2 != mocs) { + EXPECT_FALSE(true); + } +} + HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index c9777b4cc4..0cb533e4ed 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -1675,7 +1675,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1710,7 +1710,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - subBuffer->setArgStateful(&surfaceState, false, false); + subBuffer->setArgStateful(&surfaceState, false, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1739,7 +1739,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, true); + buffer->setArgStateful(&surfaceState, false, true, false); auto mocs = surfaceState.getMemoryObjectControlState(); auto gmmHelper = device->getGmmHelper(); @@ -1748,6 +1748,36 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable EXPECT_EQ(4u, surfaceState.getHeight()); } +HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumentWhenSurfaceStateIsSetThenL3IsOn) { + MockContext context; + auto size = 128; + auto retVal = CL_SUCCESS; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_READ_WRITE, + size, + nullptr, + retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + RENDER_SURFACE_STATE surfaceState = {}; + + buffer->getGraphicsAllocation()->setSize(127); + + buffer->setArgStateful(&surfaceState, false, false, true); + + auto mocs = surfaceState.getMemoryObjectControlState(); + auto gmmHelper = device->getGmmHelper(); + auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST); + if (expectedMocs != mocs && + expectedMocs2 != mocs) { + EXPECT_FALSE(true); + } +} + HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy) { MockContext context; const auto size = MemoryConstants::pageSize; @@ -1764,7 +1794,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1789,7 +1819,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableNonReadOnlyBufferThenChoose EXPECT_EQ(CL_SUCCESS, retVal); typename FamilyType::RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); const auto actualMocs = surfaceState.getMemoryObjectControlState(); @@ -1812,14 +1842,14 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState buffer->getGraphicsAllocation()->setDefaultGmm(gmm); gmm->isRenderCompressed = true; - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode()); EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType()); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); } @@ -1836,7 +1866,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt buffer->getGraphicsAllocation()->setDefaultGmm(gmm); gmm->isRenderCompressed = false; - buffer->setArgStateful(&surfaceState, false, false); + buffer->setArgStateful(&surfaceState, false, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); diff --git a/unit_tests/mocks/mock_buffer.h b/unit_tests/mocks/mock_buffer.h index 3d7b3ffa8b..7d19ca8e4f 100644 --- a/unit_tests/mocks/mock_buffer.h +++ b/unit_tests/mocks/mock_buffer.h @@ -45,7 +45,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { this->graphicsAllocation = &this->mockGfxAllocation; } } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation); } GraphicsAllocation *externalAlloc = nullptr; @@ -58,7 +58,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer { } AlignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), gfxAllocation, true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; @@ -70,7 +70,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer { } UnalignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), gfxAllocation, false, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; diff --git a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp index 06ecfdf33c..95db8cc7ba 100644 --- a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp +++ b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp @@ -1498,7 +1498,7 @@ class DrmMockBuffer : public Buffer { gfxAllocation(alloc) { } - void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override { } protected: