mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
Add extra parameters to setArgStateful()
Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0871c1bb76
commit
3df6110a17
@@ -99,7 +99,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceState, debugSurface->getGpuAddress(),
|
||||
debugSurface->getUnderlyingBufferSize(), mocs,
|
||||
false, false, false, neoDevice->getNumAvailableDevices(),
|
||||
debugSurface, neoDevice->getGmmHelper());
|
||||
debugSurface, neoDevice->getGmmHelper(), kernelImp->getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
|
||||
}
|
||||
|
||||
appendSignalEventPostWalker(hEvent);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -68,7 +68,8 @@ struct KernelHw : public KernelImp {
|
||||
NEO::Device *neoDevice = module->getDevice()->getNEODevice();
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(surfaceStateAddress, bufferAddressForSsh, bufferSizeForSsh, mocs,
|
||||
false, false, false, neoDevice->getNumAvailableDevices(),
|
||||
alloc, neoDevice->getGmmHelper());
|
||||
alloc, neoDevice->getGmmHelper(),
|
||||
kernelImmData->getDescriptor().kernelAttributes.flags.useGlobalAtomics, 1u);
|
||||
}
|
||||
|
||||
std::unique_ptr<Kernel> clone() const override {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -142,7 +142,8 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
|
||||
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, rootDeviceIndex);
|
||||
cl_mem buffer = (cl_mem)resource;
|
||||
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);
|
||||
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device,
|
||||
pKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, pContext->getNumDevices());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1490,7 +1490,10 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice());
|
||||
auto context = program->getContextPtr();
|
||||
size_t numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(),
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext);
|
||||
}
|
||||
|
||||
kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable();
|
||||
|
||||
@@ -752,7 +752,7 @@ void Buffer::setSurfaceState(const Device *device,
|
||||
multiGraphicsAllocation.addAllocation(gfxAlloc);
|
||||
}
|
||||
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u);
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
|
||||
@@ -137,7 +137,8 @@ class Buffer : public MemObj {
|
||||
bool isSubBuffer();
|
||||
bool isValidSubBufferOffset(size_t offset);
|
||||
uint64_t setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing);
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) = 0;
|
||||
virtual void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
|
||||
bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) = 0;
|
||||
bool bufferRectPitchSet(const size_t *bufferOrigin,
|
||||
const size_t *region,
|
||||
size_t &bufferRowPitch,
|
||||
@@ -208,7 +209,8 @@ class BufferHw : public Buffer {
|
||||
: Buffer(context, memoryProperties, flags, flagsIntel, size, memoryStorage, hostPtr, std::move(multiGraphicsAllocation),
|
||||
zeroCopy, isHostPtrSVM, isObjectRedescribed) {}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) override;
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
|
||||
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override;
|
||||
void appendSurfaceStateExt(void *memory);
|
||||
|
||||
static Buffer *create(Context *context,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -34,7 +34,8 @@ union SURFACE_STATE_BUFFER_LENGTH {
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnlyArgument, const Device &device) {
|
||||
void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation,
|
||||
bool isReadOnlyArgument, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) {
|
||||
auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
|
||||
const auto isReadOnly = isValueSet(getFlags(), CL_MEM_READ_ONLY) || isReadOnlyArgument;
|
||||
@@ -42,7 +43,7 @@ void BufferHw<GfxFamily>::setArgStateful(void *memory, bool forceNonAuxMode, boo
|
||||
getSurfaceSize(alignSizeForAuxTranslation, rootDeviceIndex),
|
||||
getMocsValue(disableL3, isReadOnly, rootDeviceIndex),
|
||||
true, forceNonAuxMode, isReadOnly, device.getNumAvailableDevices(),
|
||||
graphicsAllocation, device.getGmmHelper());
|
||||
graphicsAllocation, device.getGmmHelper(), useGlobalAtomics, numDevicesInContext);
|
||||
appendSurfaceStateExt(memory);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
|
||||
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyAndL1ForceEnabledWhenProgramm
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -133,7 +133,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -38,7 +38,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferNotReadonlyWhenProgrammingSurfaceStat
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -55,7 +55,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyWhenProgrammingSurfaceStateTh
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context->getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -73,7 +73,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenConstantSurfaceWhenProgrammingSurfaceStateT
|
||||
buffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::CONSTANT_SURFACE);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context->getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -93,7 +93,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenL1ForceEnabledWhenProgrammingSurfaceStateTh
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CONST);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -113,7 +113,7 @@ GEN12LPTEST_F(BufferTestsTgllp, givenBufferReadonlyL1ForceDisabledWhenProgrammin
|
||||
ASSERT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, device->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -80,7 +80,7 @@ class MockObject : public MockObjectBase<BaseType> {};
|
||||
template <>
|
||||
class MockObject<Buffer> : public MockObjectBase<Buffer> {
|
||||
public:
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {}
|
||||
};
|
||||
|
||||
template <>
|
||||
@@ -295,7 +295,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
|
||||
CL_MEM_USE_HOST_PTR, 0, sizeof(data), &data, &data, GraphicsAllocationHelper::toMultiGraphicsAllocation(&mockGfxAllocation), true, false, false) {
|
||||
}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1382,7 +1382,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatAddressIsForcedTo32bitW
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
|
||||
auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
|
||||
@@ -1418,7 +1418,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWithOffsetWhenSetArgStatefulIsCalledT
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
subBuffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
auto surfBaseAddress = surfaceState.getSurfaceBaseAddress();
|
||||
auto bufferAddress = buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress();
|
||||
@@ -1447,7 +1447,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferWhenSetArgStatefulWithL3ChacheDisable
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, true, true, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1475,7 +1475,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedButIsAReadOnlyArgumen
|
||||
|
||||
buffer->getGraphicsAllocation(rootDeviceIndex)->setSize(127);
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, true, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1500,7 +1500,7 @@ HWTEST_F(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclB
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -1525,7 +1525,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenAlignedCacheableNonReadO
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
typename FamilyType::RENDER_SURFACE_STATE surfaceState = {};
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
const auto expectedMocs = device->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
const auto actualMocs = surfaceState.getMemoryObjectControlState();
|
||||
@@ -1550,7 +1550,7 @@ HWTEST_F(BufferSetSurfaceTests, givenRenderCompressedGmmResourceWhenSurfaceState
|
||||
graphicsAllocation->setDefaultGmm(gmm);
|
||||
gmm->isRenderCompressed = true;
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
|
||||
EXPECT_TRUE(EncodeSurfaceState<FamilyType>::isAuxModeEnabled(&surfaceState, gmm));
|
||||
@@ -1571,7 +1571,7 @@ HWTEST_F(BufferSetSurfaceTests, givenNonRenderCompressedGmmResourceWhenSurfaceSt
|
||||
buffer->getGraphicsAllocation(rootDeviceIndex)->setDefaultGmm(gmm);
|
||||
gmm->isRenderCompressed = false;
|
||||
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice());
|
||||
buffer->setArgStateful(&surfaceState, false, false, false, false, context.getDevice(0)->getDevice(), false, 1u);
|
||||
|
||||
EXPECT_EQ(0u, surfaceState.getAuxiliarySurfaceBaseAddress());
|
||||
EXPECT_TRUE(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE == surfaceState.getAuxiliarySurfaceMode());
|
||||
|
||||
@@ -58,7 +58,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
|
||||
this->multiGraphicsAllocation.addAllocation(&this->mockGfxAllocation);
|
||||
}
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
|
||||
}
|
||||
GraphicsAllocation *externalAlloc = nullptr;
|
||||
@@ -79,7 +79,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
|
||||
CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 64), alignUp(&data, 64),
|
||||
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
|
||||
}
|
||||
};
|
||||
@@ -99,7 +99,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
|
||||
CL_MEM_USE_HOST_PTR, 0, sizeof(data) / 2, alignUp(&data, 4), alignUp(&data, 4),
|
||||
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1489,7 +1489,7 @@ class DrmMockBuffer : public MockBufferStorage, public Buffer {
|
||||
gfxAllocation(alloc) {
|
||||
}
|
||||
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device) override {
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -230,9 +230,9 @@ struct EncodeSurfaceState {
|
||||
|
||||
static void encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
|
||||
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper);
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext);
|
||||
static void encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices);
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext);
|
||||
static void encodeExtraCacheSettings(R_SURFACE_STATE *surfaceState, const HardwareInfo &hwInfo);
|
||||
|
||||
static constexpr uintptr_t getSurfaceBaseAddressAlignmentMask() {
|
||||
|
||||
@@ -318,7 +318,7 @@ void EncodeStoreMMIO<Family>::encode(LinearStream &csr, uint32_t offset, uint64_
|
||||
template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_t size, uint32_t mocs,
|
||||
bool cpuCoherent, bool forceNonAuxMode, bool isReadOnly, uint32_t numAvailableDevices,
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper) {
|
||||
GraphicsAllocation *allocation, GmmHelper *gmmHelper, bool useGlobalAtomics, size_t numDevicesInContext) {
|
||||
auto surfaceState = reinterpret_cast<R_SURFACE_STATE *>(dst);
|
||||
UNRECOVERABLE_IF(!isAligned<getSurfaceBaseAddressMinimumAlignment()>(size));
|
||||
|
||||
@@ -356,7 +356,7 @@ void EncodeSurfaceState<Family>::encodeBuffer(void *dst, uint64_t address, size_
|
||||
surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED));
|
||||
}
|
||||
|
||||
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices);
|
||||
EncodeSurfaceState<Family>::encodeExtraBufferParams(surfaceState, allocation, gmmHelper, isReadOnly, numAvailableDevices, useGlobalAtomics, numDevicesInContext);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -400,7 +400,7 @@ inline size_t EncodeWA<GfxFamily>::getAdditionalPipelineSelectSize(Device &devic
|
||||
|
||||
template <typename GfxFamily>
|
||||
void EncodeSurfaceState<GfxFamily>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices) {
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) {
|
||||
encodeExtraCacheSettings(surfaceState, *gmmHelper->getHardwareInfo());
|
||||
}
|
||||
|
||||
|
||||
@@ -51,39 +51,39 @@ constexpr uint32_t NotApplicable = 3u;
|
||||
|
||||
struct DispatchFlags {
|
||||
DispatchFlags() = delete;
|
||||
DispatchFlags(CsrDependencies csrDependencies, TimestampPacketContainer *barrierTimestampPacketNodes, PipelineSelectArgs pipelineSelectArgs,
|
||||
FlushStampTrackingObj *flushStampReference, QueueThrottle throttle, PreemptionMode preemptionMode, uint32_t numGrfRequired,
|
||||
uint32_t l3CacheSettings, uint32_t threadArbitrationPolicy, uint32_t additionalKernelExecInfo,
|
||||
KernelExecutionType kernelExecutionType, MemoryCompressionState memoryCompressionState,
|
||||
uint64_t sliceCount, bool blocking, bool dcFlush, bool useSLM, bool guardCommandBufferWithPipeControl, bool gsba32BitRequired,
|
||||
bool requiresCoherency, bool lowPriority, bool implicitFlush, bool outOfOrderExecutionAllowed, bool epilogueRequired,
|
||||
bool usePerDSSbackedBuffer, bool useSingleSubdevice, bool useGlobalAtomics, size_t numDevicesInContext) : csrDependencies(csrDependencies),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodes),
|
||||
pipelineSelectArgs(pipelineSelectArgs),
|
||||
flushStampReference(flushStampReference),
|
||||
throttle(throttle),
|
||||
preemptionMode(preemptionMode),
|
||||
numGrfRequired(numGrfRequired),
|
||||
l3CacheSettings(l3CacheSettings),
|
||||
threadArbitrationPolicy(threadArbitrationPolicy),
|
||||
additionalKernelExecInfo(additionalKernelExecInfo),
|
||||
kernelExecutionType(kernelExecutionType),
|
||||
memoryCompressionState(memoryCompressionState),
|
||||
sliceCount(sliceCount),
|
||||
blocking(blocking),
|
||||
dcFlush(dcFlush),
|
||||
useSLM(useSLM),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControl),
|
||||
gsba32BitRequired(gsba32BitRequired),
|
||||
requiresCoherency(requiresCoherency),
|
||||
lowPriority(lowPriority),
|
||||
implicitFlush(implicitFlush),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowed),
|
||||
epilogueRequired(epilogueRequired),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBuffer),
|
||||
useSingleSubdevice(useSingleSubdevice),
|
||||
useGlobalAtomics(useGlobalAtomics),
|
||||
numDevicesInContext(numDevicesInContext){};
|
||||
DispatchFlags(CsrDependencies csrDependenciesP, TimestampPacketContainer *barrierTimestampPacketNodesP, PipelineSelectArgs pipelineSelectArgsP,
|
||||
FlushStampTrackingObj *flushStampReferenceP, QueueThrottle throttleP, PreemptionMode preemptionModeP, uint32_t numGrfRequiredP,
|
||||
uint32_t l3CacheSettingsP, uint32_t threadArbitrationPolicyP, uint32_t additionalKernelExecInfoP,
|
||||
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
|
||||
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
||||
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, size_t numDevicesInContextP) : csrDependencies(csrDependenciesP),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||
pipelineSelectArgs(pipelineSelectArgsP),
|
||||
flushStampReference(flushStampReferenceP),
|
||||
throttle(throttleP),
|
||||
preemptionMode(preemptionModeP),
|
||||
numGrfRequired(numGrfRequiredP),
|
||||
l3CacheSettings(l3CacheSettingsP),
|
||||
threadArbitrationPolicy(threadArbitrationPolicyP),
|
||||
additionalKernelExecInfo(additionalKernelExecInfoP),
|
||||
kernelExecutionType(kernelExecutionTypeP),
|
||||
memoryCompressionState(memoryCompressionStateP),
|
||||
sliceCount(sliceCountP),
|
||||
blocking(blockingP),
|
||||
dcFlush(dcFlushP),
|
||||
useSLM(useSLMP),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
||||
gsba32BitRequired(gsba32BitRequiredP),
|
||||
requiresCoherency(requiresCoherencyP),
|
||||
lowPriority(lowPriorityP),
|
||||
implicitFlush(implicitFlushP),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
||||
epilogueRequired(epilogueRequiredP),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
||||
useSingleSubdevice(useSingleSubdeviceP),
|
||||
useGlobalAtomics(useGlobalAtomicsP),
|
||||
numDevicesInContext(numDevicesInContextP){};
|
||||
|
||||
CsrDependencies csrDependencies;
|
||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||
|
||||
@@ -52,7 +52,7 @@ void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStre
|
||||
|
||||
template <>
|
||||
void EncodeSurfaceState<Family>::encodeExtraBufferParams(R_SURFACE_STATE *surfaceState, GraphicsAllocation *allocation, GmmHelper *gmmHelper,
|
||||
bool isReadOnly, uint32_t numAvailableDevices) {
|
||||
bool isReadOnly, uint32_t numAvailableDevices, bool useGlobalAtomics, size_t numDevicesInContext) {
|
||||
const bool isL3Allowed = surfaceState->getMemoryObjectControlState() == gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
if (isL3Allowed) {
|
||||
const bool isConstantSurface = allocation && allocation->getAllocationType() == GraphicsAllocation::AllocationType::CONSTANT_SURFACE;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -155,7 +155,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationPr
|
||||
GraphicsAllocation allocation(0, GraphicsAllocation::AllocationType::UNKNOWN, cpuAddr, gpuAddr, 0u, allocSize, MemoryPool::MemoryNull, 1);
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
false, false, false, 1u,
|
||||
&allocation, pDevice->getGmmHelper());
|
||||
&allocation, pDevice->getGmmHelper(), false, 1u);
|
||||
EXPECT_EQ(length.SurfaceState.Depth + 1u, state->getDepth());
|
||||
EXPECT_EQ(length.SurfaceState.Width + 1u, state->getWidth());
|
||||
EXPECT_EQ(length.SurfaceState.Height + 1u, state->getHeight());
|
||||
@@ -182,7 +182,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenAllocationNo
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
true, false, false, 1u,
|
||||
nullptr, pDevice->getGmmHelper());
|
||||
nullptr, pDevice->getGmmHelper(), false, 1u);
|
||||
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, state->getSurfaceType());
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_IA_COHERENT, state->getCoherencyType());
|
||||
@@ -208,7 +208,7 @@ HWTEST_F(CommandEncodeStatesTest, givenCreatedSurfaceStateBufferWhenGpuCoherency
|
||||
|
||||
EncodeSurfaceState<FamilyType>::encodeBuffer(stateBuffer, gpuAddr, allocSize, 1,
|
||||
false, false, false, 1u,
|
||||
nullptr, pDevice->getGmmHelper());
|
||||
nullptr, pDevice->getGmmHelper(), false, 1u);
|
||||
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::COHERENCY_TYPE_GPU_COHERENT, state->getCoherencyType());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user