Add extra parameters to setArgStateful()

Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
Igor Venevtsev
2021-02-04 16:48:27 +00:00
committed by Compute-Runtime-Automation
parent 0871c1bb76
commit 3df6110a17
19 changed files with 91 additions and 83 deletions

View File

@@ -99,7 +99,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceState, debugSurface->getGpuAddress(),
debugSurface->getUnderlyingBufferSize(), mocs,
false, false, false, neoDevice->getNumAvailableDevices(),
debugSurface, neoDevice->getGmmHelper());
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
}
appendSignalEventPostWalker(hEvent);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -68,7 +68,8 @@ struct KernelHw : public KernelImp {
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs,
false, false, false, neoDevice->getNumAvailableDevices(),
alloc, neoDevice->getGmmHelper());
alloc, neoDevice->getGmmHelper(),
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
}
std::unique_ptr<Kernel> clone() const override {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -142,7 +142,8 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex);
cl_mem buffer = (cl_mem)resource;
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
}
}

View File

@@ -1490,7 +1490,10 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice());
auto context = program->getContextPtr();
size_t numDevicesInContext = context ? context->getNumDevices() : 1u;
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(),
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext);
}
kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable();

View File

@@ -752,7 +752,7 @@ void Buffer::setSurfaceState(const Device *device,
multiGraphicsAllocation.addAllocation(gfxAlloc);
}
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u);
delete buffer;
}

View File

@@ -137,7 +137,8 @@ class Buffer : public MemObj {
bool isSubBuffer();
bool isValidSubBufferOffset(size_t offset);
uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing);
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) = 0;
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) = 0;
bool bufferRectPitchSet(const size_t *bufferOrigin,
const size_t *region,
size_t &bufferRowPitch,
@@ -208,7 +209,8 @@ class BufferHw : public Buffer {
: Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation),
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) override;
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override;
void appendSurfaceStateExt(void *memory);
static Buffer *create(Context *context,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,8 @@ union SURFACE_STATE_BUFFER_LENGTH {
};
template <typename GfxFamily>
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) {
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) {
auto rootDeviceIndex = device.getRootDeviceIndex();
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
@@ -42,7 +43,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex),
getMocsValue(disableL3, isReadOnly, rootDeviceIndex),
true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(),
graphicsAllocation, device.getGmmHelper());
graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, numDevicesInContext);
appendSurfaceStateExt(memory);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgramm
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -133,7 +133,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
ASSERT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -80,7 +80,7 @@ class MockObject : public MockObjectBase<BaseType> {};
template <>
class MockObject<Buffer> : public MockObjectBase<Buffer> {
public:
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {}
};
template <>
@@ -295,7 +295,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
}
};

View File

@@ -1382,7 +1382,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
@@ -1418,7 +1418,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
@@ -1447,7 +1447,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1475,7 +1475,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumen
buffer->getGraphicsAllocation(rootDeviceIndex)->setSize(127);
buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1500,7 +1500,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB
EXPECT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -1525,7 +1525,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenAlignedCacheableNonReadO
EXPECT_EQ(CL_SUCCESS, retVal);
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
const auto actualMocs = surfaceState.getMemoryObjectControlState();
@@ -1550,7 +1550,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
graphicsAllocation->setDefaultGmm(gmm);
gmm->isRenderCompressed = true;
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::isAuxModeEnabled(&surfaceState, gmm));
@@ -1571,7 +1571,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
buffer->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(gmm);
gmm->isRenderCompressed = false;
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());

View File

@@ -58,7 +58,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
this->multiGraphicsAllocation.addAllocation(&this->mockGfxAllocation);
}
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
}
GraphicsAllocation *externalAlloc = nullptr;
@@ -79,7 +79,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64),
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
}
};
@@ -99,7 +99,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4),
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
}
};

View File

@@ -1489,7 +1489,7 @@ class DrmMockBuffer : public MockBufferStorage, public Buffer {
gfxAllocation(alloc) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
}
protected:

View File

@@ -230,9 +230,9 @@ struct EncodeSurfaceState {
static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper);
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext);
static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices);
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext);
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {

View File

@@ -318,7 +318,7 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
template <typename Family>
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
GraphicsAllocation *allocation, GmmHelper *gmmHelper) {
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext) {
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(dst);
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressMinimumAlignment()>(size));
@@ -356,7 +356,7 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
}
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices);
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, numDevicesInContext);
}
template <typename Family>

View File

@@ -400,7 +400,7 @@ inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &devic
template <typename GfxFamily>
void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices) {
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) {
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
}

View File

@@ -51,39 +51,39 @@ constexpr uint32_t NotApplicable = 3u;
struct DispatchFlags {
DispatchFlags() = delete;
DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs,
FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo,
KernelExecutionType kernelExecutionType, MemoryCompressionState memoryCompressionState,
uint64_t sliceCount, bool blocking, bool dcFlush, bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
bool requiresCoherency, bool lowPriority, bool implicitFlush, bool outOfOrderExecutionAllowed, bool epilogueRequired,
bool usePerDSSbackedBuffer, bool useSingleSubdevice, bool useGlobalAtomics, size_t numDevicesInContext) : csrDependencies(csrDependencies),
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
pipelineSelectArgs(pipelineSelectArgs),
flushStampReference(flushStampReference),
throttle(throttle),
preemptionMode(preemptionMode),
numGrfRequired(numGrfRequired),
l3CacheSettings(l3CacheSettings),
threadArbitrationPolicy(threadArbitrationPolicy),
additionalKernelExecInfo(additionalKernelExecInfo),
kernelExecutionType(kernelExecutionType),
memoryCompressionState(memoryCompressionState),
sliceCount(sliceCount),
blocking(blocking),
dcFlush(dcFlush),
useSLM(useSLM),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
gsba32BitRequired(gsba32BitRequired),
requiresCoherency(requiresCoherency),
lowPriority(lowPriority),
implicitFlush(implicitFlush),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
epilogueRequired(epilogueRequired),
usePerDssBackedBuffer(usePerDSSbackedBuffer),
useSingleSubdevice(useSingleSubdevice),
useGlobalAtomics(useGlobalAtomics),
numDevicesInContext(numDevicesInContext){};
DispatchFlags(CsrDependencies csrDependenciesP, TimestampPacketContainer *barrierTimestampPacketNodesP, PipelineSelectArgs pipelineSelectArgsP,
FlushStampTrackingObj *flushStampReferenceP, QueueThrottle throttleP, PreemptionMode preemptionModeP, uint32_t numGrfRequiredP,
uint32_t l3CacheSettingsP, uint32_t threadArbitrationPolicyP, uint32_t additionalKernelExecInfoP,
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t numDevicesInContextP) : csrDependencies(csrDependenciesP),
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
throttle(throttleP),
preemptionMode(preemptionModeP),
numGrfRequired(numGrfRequiredP),
l3CacheSettings(l3CacheSettingsP),
threadArbitrationPolicy(threadArbitrationPolicyP),
additionalKernelExecInfo(additionalKernelExecInfoP),
kernelExecutionType(kernelExecutionTypeP),
memoryCompressionState(memoryCompressionStateP),
sliceCount(sliceCountP),
blocking(blockingP),
dcFlush(dcFlushP),
useSLM(useSLMP),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
gsba32BitRequired(gsba32BitRequiredP),
requiresCoherency(requiresCoherencyP),
lowPriority(lowPriorityP),
implicitFlush(implicitFlushP),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
useSingleSubdevice(useSingleSubdeviceP),
useGlobalAtomics(useGlobalAtomicsP),
numDevicesInContext(numDevicesInContextP){};
CsrDependencies csrDependencies;
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;

View File

@@ -52,7 +52,7 @@ void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStre
template <>
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
bool isReadOnly, uint32_t numAvailableDevices) {
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) {
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
if (isL3Allowed) {
const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -155,7 +155,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, false, 1u,
&allocation, pDevice->getGmmHelper());
&allocation, pDevice->getGmmHelper(), false, 1u);
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
@@ -182,7 +182,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
true, false, false, 1u,
nullptr, pDevice->getGmmHelper());
nullptr, pDevice->getGmmHelper(), false, 1u);
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
@@ -208,7 +208,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
false, false, false, 1u,
nullptr, pDevice->getGmmHelper());
nullptr, pDevice->getGmmHelper(), false, 1u);
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());