Turn on caching if buffers are set as constant args.
- If resources is constant arg it means it will only be read - Therefore even for read_write buffer we may turn on caching. Change-Id: Id2c34d4993111bf5c6523a1946464c397db95686 Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
parent
e57682a8a9
commit
f0d3b47886
|
@ -138,7 +138,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
|||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1187,7 +1187,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
|
||||
if (requiresSshForBuffers()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, auxTranslationKernel);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, auxTranslationKernel, kernelArgInfo.isReadOnly);
|
||||
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
||||
}
|
||||
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
||||
|
|
|
@ -534,7 +534,7 @@ Buffer *Buffer::createBufferHwFromDevice(const Device *device,
|
|||
return pBuffer;
|
||||
}
|
||||
|
||||
uint32_t Buffer::getMocsValue(bool disableL3Cache) const {
|
||||
uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const {
|
||||
uint64_t bufferAddress = 0;
|
||||
size_t bufferSize = 0;
|
||||
if (getGraphicsAllocation()) {
|
||||
|
@ -546,7 +546,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache) const {
|
|||
}
|
||||
bufferAddress += this->offset;
|
||||
|
||||
bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY);
|
||||
bool readOnlyMemObj = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
|
||||
bool alignedMemObj = isAligned<MemoryConstants::cacheLineSize>(bufferAddress) &&
|
||||
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
|
||||
|
||||
|
@ -565,7 +565,7 @@ void Buffer::setSurfaceState(const Device *device,
|
|||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags) {
|
||||
auto buffer = Buffer::createBufferHwFromDevice(device, flags, svmSize, svmPtr, svmPtr, gfxAlloc, true, false, false);
|
||||
buffer->setArgStateful(surfaceState, false, false);
|
||||
buffer->setArgStateful(surfaceState, false, false, false);
|
||||
buffer->graphicsAllocation = nullptr;
|
||||
delete buffer;
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ class Buffer : public MemObj {
|
|||
bool isValidSubBufferOffset(size_t offset);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize) { return setArgStateless(memory, patchSize, false); }
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, bool set32BitAddressing);
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) = 0;
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnly) = 0;
|
||||
bool bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
size_t &bufferRowPitch,
|
||||
|
@ -124,7 +124,7 @@ class Buffer : public MemObj {
|
|||
|
||||
bool isReadWriteOnCpuAllowed(cl_bool blocking, cl_uint numEventsInWaitList, void *ptr, size_t size);
|
||||
|
||||
uint32_t getMocsValue(bool disableL3Cache) const;
|
||||
uint32_t getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) const;
|
||||
|
||||
protected:
|
||||
Buffer(Context *context,
|
||||
|
@ -169,8 +169,8 @@ class BufferHw : public Buffer {
|
|||
: Buffer(context, properties, size, memoryStorage, hostPtr, gfxAllocation,
|
||||
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) override;
|
||||
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation);
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnlyArgument) override;
|
||||
void appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnlyArgument);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
MemoryProperties properties,
|
||||
|
|
|
@ -28,7 +28,7 @@ union SURFACE_STATE_BUFFER_LENGTH {
|
|||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation) {
|
||||
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool programForAuxTranslation, bool isReadOnlyArgument) {
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
using AUXILIARY_SURFACE_MODE = typename RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE;
|
||||
|
@ -62,7 +62,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
|||
surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR);
|
||||
surfaceState->setVerticalLineStride(0);
|
||||
surfaceState->setVerticalLineStrideOffset(0);
|
||||
surfaceState->setMemoryObjectControlState(getMocsValue(programForAuxTranslation));
|
||||
surfaceState->setMemoryObjectControlState(getMocsValue(programForAuxTranslation, isReadOnlyArgument));
|
||||
surfaceState->setSurfaceBaseAddress(bufferAddressAligned);
|
||||
|
||||
Gmm *gmm = graphicsAllocation ? graphicsAllocation->getDefaultGmm() : nullptr;
|
||||
|
@ -77,7 +77,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
|||
surfaceState->setAuxiliarySurfaceMode(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE);
|
||||
}
|
||||
|
||||
appendBufferState(memory, context, getGraphicsAllocation());
|
||||
appendBufferState(memory, context, getGraphicsAllocation(), isReadOnlyArgument);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation) {
|
||||
void BufferHw<GfxFamily>::appendBufferState(void *memory, Context *context, GraphicsAllocation *gfxAllocation, bool isReadOnly) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -46,7 +46,7 @@ GEN9TEST_F(Gen9HardwareCommandsTest, givenBufferThatIsNotZeroCopyWhenSurfaceStat
|
|||
auto gmmHelper = context.getDevice(0)->getExecutionEnvironment()->getGmmHelper();
|
||||
gmmHelper->setSimplifiedMocsTableUsage(true);
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
//make sure proper mocs is selected
|
||||
constexpr uint32_t expectedMocs = GmmHelper::cacheEnabledIndex;
|
||||
EXPECT_EQ(expectedMocs, surfaceState.getMemoryObjectControlStateIndexToMocsTables());
|
||||
|
|
|
@ -74,7 +74,7 @@ class MockObject : public MockObjectBase<BaseType> {};
|
|||
template <>
|
||||
class MockObject<Buffer> : public MockObjectBase<Buffer> {
|
||||
public:
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {}
|
||||
};
|
||||
|
||||
template <>
|
||||
|
@ -278,7 +278,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
|
|||
MockBuffer() : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data), &data, &data, &mockGfxAllocation, true, false, false) {
|
||||
}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "core/helpers/ptr_math.h"
|
||||
#include "core/unit_tests/helpers/debug_manager_state_restore.h"
|
||||
#include "runtime/gmm_helper/gmm.h"
|
||||
#include "runtime/gmm_helper/gmm_helper.h"
|
||||
#include "runtime/kernel/kernel.h"
|
||||
#include "runtime/memory_manager/surface.h"
|
||||
#include "runtime/memory_manager/unified_memory_manager.h"
|
||||
|
@ -139,6 +140,35 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSu
|
|||
EXPECT_EQ(surfacetype, SURFACE_FORMAT::SURFACE_FORMAT_RAW);
|
||||
}
|
||||
|
||||
HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhenSurfaceStateIsSetThenCachingIsOn) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
pKernelInfo->kernelArgInfo[0].offsetHeap));
|
||||
|
||||
pKernelInfo->requiresSshForBuffers = true;
|
||||
pKernelInfo->kernelArgInfo[0].isReadOnly = true;
|
||||
|
||||
auto graphicsAllocation = castToObject<Buffer>(buffer)->getGraphicsAllocation();
|
||||
graphicsAllocation->setSize(graphicsAllocation->getUnderlyingBufferSize() - 1);
|
||||
|
||||
cl_mem clMemBuffer = buffer;
|
||||
|
||||
cl_int ret = pKernel->setArgBuffer(0, sizeof(cl_mem), &clMemBuffer);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, ret);
|
||||
|
||||
auto mocs = surfaceState->getMemoryObjectControlState();
|
||||
auto gmmHelper = pDevice->getGmmHelper();
|
||||
auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
if (expectedMocs != mocs &&
|
||||
expectedMocs2 != mocs) {
|
||||
EXPECT_FALSE(true);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramNullSurfaceState) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
|
||||
|
|
|
@ -1675,7 +1675,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
|
||||
auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress();
|
||||
|
@ -1710,7 +1710,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
subBuffer->setArgStateful(&surfaceState, false, false);
|
||||
subBuffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
|
||||
auto bufferAddress = buffer->getGraphicsAllocation()->getGpuAddress();
|
||||
|
@ -1739,7 +1739,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable
|
|||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, true);
|
||||
buffer->setArgStateful(&surfaceState, false, true, false);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
|
@ -1748,6 +1748,36 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable
|
|||
EXPECT_EQ(4u, surfaceState.getHeight());
|
||||
}
|
||||
|
||||
HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumentWhenSurfaceStateIsSetThenL3IsOn) {
|
||||
MockContext context;
|
||||
auto size = 128;
|
||||
auto retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(
|
||||
&context,
|
||||
CL_MEM_READ_WRITE,
|
||||
size,
|
||||
nullptr,
|
||||
retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
buffer->getGraphicsAllocation()->setSize(127);
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false, true);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
auto expectedMocs = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
auto expectedMocs2 = gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
if (expectedMocs != mocs &&
|
||||
expectedMocs2 != mocs) {
|
||||
EXPECT_FALSE(true);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy) {
|
||||
MockContext context;
|
||||
const auto size = MemoryConstants::pageSize;
|
||||
|
@ -1764,7 +1794,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB
|
|||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
|
@ -1789,7 +1819,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableNonReadOnlyBufferThenChoose
|
|||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
|
@ -1812,14 +1842,14 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
|
|||
buffer->getGraphicsAllocation()->setDefaultGmm(gmm);
|
||||
gmm->isRenderCompressed = true;
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
|
||||
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E == surfaceState.getAuxiliarySurfaceMode());
|
||||
EXPECT_TRUE(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT == surfaceState.getCoherencyType());
|
||||
|
||||
buffer->getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER);
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
|
||||
}
|
||||
|
||||
|
@ -1836,7 +1866,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
|
|||
buffer->getGraphicsAllocation()->setDefaultGmm(gmm);
|
||||
gmm->isRenderCompressed = false;
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false);
|
||||
buffer->setArgStateful(&surfaceState, false, false, false);
|
||||
|
||||
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
|
||||
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
|
||||
|
|
|
@ -45,7 +45,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
|
|||
this->graphicsAllocation = &this->mockGfxAllocation;
|
||||
}
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {
|
||||
Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation);
|
||||
}
|
||||
GraphicsAllocation *externalAlloc = nullptr;
|
||||
|
@ -58,7 +58,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
|
|||
}
|
||||
AlignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64), gfxAllocation, true, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {
|
||||
Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation);
|
||||
}
|
||||
};
|
||||
|
@ -70,7 +70,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
|
|||
}
|
||||
UnalignedBuffer(GraphicsAllocation *gfxAllocation) : MockBufferStorage(true), Buffer(nullptr, CL_MEM_USE_HOST_PTR, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4), gfxAllocation, false, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {
|
||||
Buffer::setSurfaceState(device.get(), memory, getSize(), getCpuAddress(), &mockGfxAllocation);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1498,7 +1498,7 @@ class DrmMockBuffer : public Buffer {
|
|||
gfxAllocation(alloc) {
|
||||
}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3Cache, bool isReadOnly) override {
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
Loading…
Reference in New Issue