Add extra parameters to Buffer::setSurfaceState() method

Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
Igor Venevtsev
2021-03-03 12:25:26 +00:00
committed by Compute-Runtime-Automation
parent 9114a3e414
commit 52e118fe49
20 changed files with 92 additions and 50 deletions

View File

@@ -88,7 +88,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
&allocation, 0, 0);
&allocation, 0, 0, false, 1u);
}
}

View File

@@ -540,7 +540,10 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful);
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch,
addressToPatch, 0, debugSurface, 0, 0,
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
kernel->getTotalNumDevicesInContext());
return true;
}

View File

@@ -710,6 +710,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
Kernel *kernel = nullptr;
bool usePerDssBackedBuffer = false;
bool auxTranslationRequired = false;
bool useGlobalAtomics = false;
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {
@@ -731,6 +732,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
if (kernel->requiresPerDssBackedBuffer(rootDeviceIndex)) {
usePerDssBackedBuffer = true;
}
if (kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) {
useGlobalAtomics = true;
}
}
if (mediaSamplerRequired) {
@@ -776,9 +781,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
auto context = kernel->getProgram()->getContextPtr();
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
DispatchFlags dispatchFlags(
{}, //csrDependencies
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
@@ -805,8 +807,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
false, //epilogueRequired
usePerDssBackedBuffer, //usePerDssBackedBuffer
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
numDevicesInContext //numDevicesInContext
useGlobalAtomics, //useGlobalAtomics
kernel->getTotalNumDevicesInContext() //numDevicesInContext
);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;

View File

@@ -101,7 +101,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh),
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0,
mainKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
mainKernel->getTotalNumDevicesInContext());
}
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();

View File

@@ -217,9 +217,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
auto context = kernel->getProgram()->getContextPtr();
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
DispatchFlags dispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
@@ -247,7 +244,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
numDevicesInContext); //numDevicesInContext
kernel->getTotalNumDevicesInContext()); //numDevicesInContext
if (timestampPacketDependencies) {
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);

View File

@@ -155,7 +155,8 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
auto surfaceState = ptrOffset(ssh, arg.bindful);
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
}
@@ -182,7 +183,8 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
auto surfaceState = ptrOffset(ssh, sshOffset);
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
}
@@ -358,16 +360,18 @@ cl_int Kernel::initialize() {
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), arg);
}
bool useGlobalAtomics = getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics;
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext());
}
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext());
}
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
@@ -956,7 +960,8 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
if (requiresSshForBuffers(rootDeviceIndex)) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
if (!kernelArguments[argIndex].isPatched) {
patchedArgumentsNum++;
@@ -1010,7 +1015,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch());
allocSize -= offset;
}
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
if (!kernelArguments[argIndex].isPatched) {
@@ -1508,10 +1514,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
auto context = program->getContextPtr();
size_t numDevicesInContext = context ? context->getNumDevices() : 1u;
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(),
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext);
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable();
@@ -1543,7 +1547,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (requiresSshForBuffers(rootDeviceIndex)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
isArgSet.set(rootDeviceIndex);
}
@@ -1596,7 +1601,8 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false,
pipe->getSize(), pipe->getCpuAddress(), 0,
graphicsAllocation, 0, 0);
graphicsAllocation, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
return CL_SUCCESS;
@@ -2457,7 +2463,8 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
if (isValidOffset(defaultQueueSurfaceAddress.bindful)) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), defaultQueueSurfaceAddress.bindful);
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
}
@@ -2475,7 +2482,8 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), eventPoolSurfaceAddress.bindful);
auto eventPoolBuffer = devQueue->getEventPoolBuffer();
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(),
(void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0);
(void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
}
@@ -2507,7 +2515,8 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation,
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), syncBuffer.bindful);
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0);
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0,
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
}
}
@@ -2821,7 +2830,14 @@ void Kernel::setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim) {
uint32_t Kernel::getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const {
return kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize;
}
uint32_t Kernel::getSlmTotalSize(uint32_t rootDeviceIndex) const {
return kernelDeviceInfos[rootDeviceIndex].slmTotalSize;
}
size_t Kernel::getTotalNumDevicesInContext() const {
auto context = program->getContextPtr();
return context ? context->getTotalNumDevices() : 1u;
}
} // namespace NEO

View File

@@ -408,6 +408,8 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
MultiDeviceKernel *getMultiDeviceKernel() const { return pMultiDeviceKernel; }
void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; }
size_t getTotalNumDevicesInContext() const;
protected:
struct ObjectCounts {
uint32_t imageCount;

View File

@@ -762,13 +762,15 @@ void Buffer::setSurfaceState(const Device *device,
size_t offset,
GraphicsAllocation *gfxAlloc,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel) {
cl_mem_flags_intel flagsIntel,
bool useGlobalAtomics,
size_t numAvailableDevices) {
auto multiGraphicsAllocation = MultiGraphicsAllocation(device->getRootDeviceIndex());
if (gfxAlloc) {
multiGraphicsAllocation.addAllocation(gfxAlloc);
}
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, numAvailableDevices);
delete buffer;
}

View File

@@ -127,7 +127,9 @@ class Buffer : public MemObj {
size_t offset,
GraphicsAllocation *gfxAlloc,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel);
cl_mem_flags_intel flagsIntel,
bool useGlobalAtomics,
size_t numDevicesInContext);
static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
Context *context,

View File

@@ -15,6 +15,7 @@
#include "shared/source/program/print_formatter.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/mem_obj/buffer.h"
@@ -64,7 +65,9 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)), printfSurfaceArg.bindful);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0);
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0,
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
kernel->getTotalNumDevicesInContext());
}
}

View File

@@ -1349,10 +1349,13 @@ HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenD
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->passedDispatchFlags.numDevicesInContext);
context->devices.resize(10);
MockDevice subDevice;
context->devices.push_back(pClDevice);
context->devices.push_back(pClDevice);
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(10u, mockCsr->passedDispatchFlags.numDevicesInContext);
context->devices.resize(1);
EXPECT_EQ(3u, mockCsr->passedDispatchFlags.numDevicesInContext);
context->devices.pop_back();
context->devices.pop_back();
}
HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) {
@@ -1708,4 +1711,4 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
}
}

View File

@@ -87,6 +87,7 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
EXPECT_EQ(-1, csr.lastMediaSamplerConfig);
EXPECT_EQ(PreemptionMode::Initial, csr.lastPreemptionMode);
EXPECT_EQ(0u, csr.latestSentStatelessMocsConfig);
EXPECT_FALSE(csr.lastSentUseGlobalAtomics);
}
TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) {

View File

@@ -132,6 +132,7 @@ struct UltCommandStreamReceiverTest
commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy;
commandStreamReceiver.lastSentCoherencyRequest = 0;
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.lastSentUseGlobalAtomics = false;
}
template <typename GfxFamily>

View File

@@ -271,7 +271,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
void *addressToPatch = svmAlloc.getUnderlyingBuffer();
size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize();
Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false,
sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0);
sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0, false, 1u);
}
// verify ssh was properly patched
@@ -427,7 +427,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
}
Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(),
svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0);
svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0, false, 1u);
// verify ssh was properly patched
int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize);

View File

@@ -81,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
using BaseClass::CommandStreamReceiver::lastSentL3Config;
using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy;
using BaseClass::CommandStreamReceiver::lastSentUseGlobalAtomics;
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;

View File

@@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenBufferSetSurfaceThatMemo
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1326,7 +1326,7 @@ HWTEST_F(BufferSetSurfaceTests, givenDebugVariableToDisableCachingForStatefulBuf
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1346,7 +1346,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToC
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0, false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1365,7 +1365,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTo
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1384,7 +1384,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCach
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0, false, 1u);
auto mocs = surfaceState.getMemoryObjectControlState();
auto gmmHelper = device->getGmmHelper();
@@ -1403,7 +1403,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTh
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u);
auto width = surfaceState.getWidth();
EXPECT_EQ(alignUp(width, 4), width);
@@ -1421,7 +1421,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvm
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0, false, 1u);
auto baseAddress = surfaceState.getSurfaceBaseAddress();
EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress);
@@ -1437,7 +1437,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenB
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
auto surfType = surfaceState.getSurfaceType();
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType);
@@ -1450,7 +1450,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNull
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
RENDER_SURFACE_STATE surfaceState = {};
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, false, 1u);
auto surfType = surfaceState.getSurfaceType();
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType);
@@ -1684,7 +1684,7 @@ HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgramm
uintptr_t ptr = 0xfffff000;
void *svmPtr = reinterpret_cast<void *>(ptr);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u);
EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress());
SURFACE_STATE_BUFFER_LENGTH length = {};
@@ -1701,7 +1701,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBei
MockContext context;
void *svmPtr = reinterpret_cast<void *>(0x1005);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u);
EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState());
}

View File

@@ -59,7 +59,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
}
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0, false, 1u);
}
GraphicsAllocation *externalAlloc = nullptr;
};
@@ -80,7 +80,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u);
}
};
@@ -100,7 +100,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) {
}
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u);
}
};

View File

@@ -332,6 +332,7 @@ void CommandStreamReceiver::initProgrammingFlags() {
lastMediaSamplerConfig = -1;
lastPreemptionMode = PreemptionMode::Initial;
latestSentStatelessMocsConfig = 0;
lastSentUseGlobalAtomics = false;
}
void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) {

View File

@@ -345,6 +345,7 @@ class CommandStreamReceiver {
bool useNewResourceImplicitFlush = false;
bool newResources = false;
bool useGpuIdleImplicitFlush = false;
bool lastSentUseGlobalAtomics = false;
};
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump,

View File

@@ -367,6 +367,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
latestSentStatelessMocsConfig = mocsIndex;
}
if (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics) {
isStateBaseAddressDirty = true;
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
}
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
auto memoryCompressionState = lastMemoryCompressionState;