diff --git a/runtime/built_ins/built_ins.inl b/runtime/built_ins/built_ins.inl index 3a3f1355d9..98466b1b63 100644 --- a/runtime/built_ins/built_ins.inl +++ b/runtime/built_ins/built_ins.inl @@ -64,6 +64,7 @@ void BuiltInOp::resizeKernelInstances(siz for (size_t i = convertToNonAuxKernel.size(); i < size; i++) { auto clonedKernel1 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); + clonedKernel1->setDisableL3forStatefulBuffers(true); auto clonedKernel2 = Kernel::create(baseKernel->getProgram(), baseKernel->getKernelInfo(), nullptr); clonedKernel1->cloneKernel(baseKernel); clonedKernel2->cloneKernel(baseKernel); diff --git a/runtime/gtpin/gtpin_callbacks.cpp b/runtime/gtpin/gtpin_callbacks.cpp index 1adb24d124..5781520f59 100644 --- a/runtime/gtpin/gtpin_callbacks.cpp +++ b/runtime/gtpin/gtpin_callbacks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -136,7 +136,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI); cl_mem buffer = (cl_mem)resource; auto pBuffer = castToObjectOrAbort(buffer); - pBuffer->setArgStateful(pSurfaceState, false); + pBuffer->setArgStateful(pSurfaceState, false, false); } } diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 485c0b189e..9ac9154410 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -1131,7 +1131,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); - buffer->setArgStateful(surfaceState, forceNonAuxMode); + buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3forStatefulBuffers); kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); } addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index ffa6006d61..31028d896a 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -381,6 +381,10 @@ class Kernel : public BaseObject<_cl_kernel> { using CacheFlushAllocationsVec = StackVec; void getAllocationsForCacheFlush(CacheFlushAllocationsVec &out) const; + void setDisableL3forStatefulBuffers(bool disableL3forStatefulBuffers) { + this->disableL3forStatefulBuffers = disableL3forStatefulBuffers; + } + protected: struct ObjectCounts { uint32_t imageCount; @@ -479,6 +483,8 @@ class Kernel : public BaseObject<_cl_kernel> { std::vector kernelArgHandlers; std::vector kernelSvmGfxAllocations; + bool disableL3forStatefulBuffers = false; + size_t numberOfBindingTableStates; size_t localBindingTableOffset; std::unique_ptr pSshLocal; diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index e8241ccb13..76b9ad8bec 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -508,7 +508,7 @@ void Buffer::setSurfaceState(const Device *device, GraphicsAllocation *gfxAlloc, cl_mem_flags flags) { auto buffer = Buffer::createBufferHwFromDevice(device, flags, svmSize, svmPtr, svmPtr, gfxAlloc, true, false, false); - buffer->setArgStateful(surfaceState, false); + buffer->setArgStateful(surfaceState, false, false); buffer->graphicsAllocation = nullptr; delete buffer; } diff --git a/runtime/mem_obj/buffer.h b/runtime/mem_obj/buffer.h index a142edae45..a20d86b7eb 100644 --- a/runtime/mem_obj/buffer.h +++ b/runtime/mem_obj/buffer.h @@ -104,7 +104,7 @@ class Buffer : public MemObj { bool isValidSubBufferOffset(size_t offset); uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); } uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing); - virtual void setArgStateful(void *memory, bool forceNonAuxMode) = 0; + virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) = 0; bool bufferRectPitchSet(const size_t *bufferOrigin, const size_t *region, size_t &bufferRowPitch, @@ -162,7 +162,7 @@ class BufferHw : public Buffer { : Buffer(context, flags, size, memoryStorage, hostPtr, gfxAllocation, zeroCopy, isHostPtrSVM, isObjectRedescribed) {} - void setArgStateful(void *memory, bool forceNonAuxMode) override; + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override; static Buffer *create(Context *context, cl_mem_flags flags, diff --git a/runtime/mem_obj/buffer.inl b/runtime/mem_obj/buffer.inl index f584795012..be2e2459cb 100644 --- a/runtime/mem_obj/buffer.inl +++ b/runtime/mem_obj/buffer.inl @@ -27,7 +27,7 @@ union SURFACE_STATE_BUFFER_LENGTH { }; template -void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { +void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; @@ -65,9 +65,7 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); - if (((isAligned(bufferAddress) && isAligned(bufferSize)) || - isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) && - !this->isUncacheable) { + if (!disableL3Cache && ((isAligned(bufferAddress) && isAligned(bufferSize)) || isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) && !this->isUncacheable) { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); } else { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); diff --git a/unit_tests/gen9/kernel_tests_gen9.cpp b/unit_tests/gen9/kernel_tests_gen9.cpp index 316b72457b..5825cf7b4e 100644 --- a/unit_tests/gen9/kernel_tests_gen9.cpp +++ b/unit_tests/gen9/kernel_tests_gen9.cpp @@ -46,7 +46,7 @@ GEN9TEST_F(Gen9KernelCommandsTest, givenBufferThatIsNotZeroCopyWhenSurfaceStatei auto gmmHelper = context.getDevice(0)->getExecutionEnvironment()->getGmmHelper(); gmmHelper->setSimplifiedMocsTableUsage(true); - buffer->setArgStateful(&surfaceState, false); + buffer->setArgStateful(&surfaceState, false, false); //make sure proper mocs is selected constexpr uint32_t expectedMocs = GmmHelper::cacheEnabledIndex; EXPECT_EQ(expectedMocs, surfaceState.getMemoryObjectControlStateIndexToMocsTables()); diff --git a/unit_tests/helpers/base_object_tests.cpp b/unit_tests/helpers/base_object_tests.cpp index a8dfbe01cc..1f25c601d5 100644 --- a/unit_tests/helpers/base_object_tests.cpp +++ b/unit_tests/helpers/base_object_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -73,7 +73,7 @@ class MockObject : public MockObjectBase {}; template <> class MockObject : public MockObjectBase { public: - void setArgStateful(void *memory, bool forceNonAuxMode) override {} + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {} }; template <> @@ -277,7 +277,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { MockBuffer() : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { } void setFakeOwnership() { diff --git a/unit_tests/mem_obj/buffer_tests.cpp b/unit_tests/mem_obj/buffer_tests.cpp index 0ed669728e..3a44ad33bb 100644 --- a/unit_tests/mem_obj/buffer_tests.cpp +++ b/unit_tests/mem_obj/buffer_tests.cpp @@ -1260,7 +1260,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - buffer->setArgStateful(&surfaceState, false); + buffer->setArgStateful(&surfaceState, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1295,7 +1295,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; RENDER_SURFACE_STATE surfaceState = {}; - subBuffer->setArgStateful(&surfaceState, false); + subBuffer->setArgStateful(&surfaceState, false, false); auto surfBaseAddress = surfaceState.getSurfaceBaseAddress(); auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress(); @@ -1308,6 +1308,32 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT DebugManager.flags.Force32bitAddressing.set(false); } +HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisabledIsCalledThenL3CacheShouldBeOff) { + MockContext context; + auto size = MemoryConstants::pageSize; + auto ptr = (void *)alignedMalloc(size * 2, MemoryConstants::pageSize); + auto retVal = CL_SUCCESS; + + auto buffer = std::unique_ptr(Buffer::create( + &context, + CL_MEM_USE_HOST_PTR, + size, + ptr, + retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + + using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + RENDER_SURFACE_STATE surfaceState = {}; + + buffer->setArgStateful(&surfaceState, false, true); + + auto mocs = surfaceState.getMemoryObjectControlState(); + auto gmmHelper = device->getGmmHelper(); + EXPECT_EQ(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), mocs); + + alignedFree(ptr); +} + HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceStateIsProgrammedThenSetAuxParams) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE; @@ -1322,14 +1348,14 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState buffer->getGraphicsAllocation()->gmm = gmm; gmm->isRenderCompressed = true; - buffer->setArgStateful(&surfaceState, false); + buffer->setArgStateful(&surfaceState, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode()); EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType()); buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER); - buffer->setArgStateful(&surfaceState, false); + buffer->setArgStateful(&surfaceState, false, false); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); } @@ -1346,7 +1372,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt buffer->getGraphicsAllocation()->gmm = gmm; gmm->isRenderCompressed = false; - buffer->setArgStateful(&surfaceState, false); + buffer->setArgStateful(&surfaceState, false, false); EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress()); EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode()); diff --git a/unit_tests/mocks/mock_buffer.h b/unit_tests/mocks/mock_buffer.h index 5ece86ee85..7327649ad7 100644 --- a/unit_tests/mocks/mock_buffer.h +++ b/unit_tests/mocks/mock_buffer.h @@ -44,7 +44,7 @@ class MockBuffer : public MockBufferStorage, public Buffer { this->graphicsAllocation = &this->mockGfxAllocation; } } - void setArgStateful(void *memory, bool forceNonAuxMode) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation); } GraphicsAllocation *externalAlloc = nullptr; @@ -57,7 +57,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer { } AlignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), gfxAllocation, true, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; @@ -69,7 +69,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer { } UnalignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), gfxAllocation, false, false, false) { } - void setArgStateful(void *memory, bool forceNonAuxMode) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation); } }; diff --git a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp index 945c73e477..0a505ec387 100644 --- a/unit_tests/os_interface/linux/drm_command_stream_tests.cpp +++ b/unit_tests/os_interface/linux/drm_command_stream_tests.cpp @@ -1500,7 +1500,7 @@ class DrmMockBuffer : public Buffer { gfxAllocation(alloc) { } - void setArgStateful(void *memory, bool forceNonAuxMode) override { + void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override { } protected: