mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Add extra parameters to Buffer::setSurfaceState() method
Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
9114a3e414
commit
52e118fe49
@@ -88,7 +88,7 @@ inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
|
||||
&allocation, 0, 0);
|
||||
&allocation, 0, 0, false, 1u);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -540,7 +540,10 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
|
||||
kernel->getKernelInfo(rootDeviceIndex).kernelDescriptor.payloadMappings.implicitArgs.systemThreadSurfaceAddress.bindful);
|
||||
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
|
||||
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
|
||||
Buffer::setSurfaceState(&device->getDevice(), surfaceState, false, false, sizeToPatch,
|
||||
addressToPatch, 0, debugSurface, 0, 0,
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->getTotalNumDevicesInContext());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -710,6 +710,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
Kernel *kernel = nullptr;
|
||||
bool usePerDssBackedBuffer = false;
|
||||
bool auxTranslationRequired = false;
|
||||
bool useGlobalAtomics = false;
|
||||
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
if (kernel != dispatchInfo.getKernel()) {
|
||||
@@ -731,6 +732,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
if (kernel->requiresPerDssBackedBuffer(rootDeviceIndex)) {
|
||||
usePerDssBackedBuffer = true;
|
||||
}
|
||||
|
||||
if (kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics) {
|
||||
useGlobalAtomics = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (mediaSamplerRequired) {
|
||||
@@ -776,9 +781,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
|
||||
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
|
||||
|
||||
auto context = kernel->getProgram()->getContextPtr();
|
||||
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
|
||||
@@ -805,8 +807,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
false, //epilogueRequired
|
||||
usePerDssBackedBuffer, //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
numDevicesInContext //numDevicesInContext
|
||||
useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->getTotalNumDevicesInContext() //numDevicesInContext
|
||||
);
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
|
||||
@@ -101,7 +101,9 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
|
||||
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&commandQueue.getDevice(), commandQueue.getDevice().getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh),
|
||||
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0);
|
||||
false, false, sizeToPatch, addressToPatch, 0, debugSurface, 0, 0,
|
||||
mainKernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
mainKernel->getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();
|
||||
|
||||
@@ -217,9 +217,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
|
||||
|
||||
auto context = kernel->getProgram()->getContextPtr();
|
||||
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
nullptr, //barrierTimestampPacketNodes
|
||||
@@ -247,7 +244,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
numDevicesInContext); //numDevicesInContext
|
||||
kernel->getTotalNumDevicesInContext()); //numDevicesInContext
|
||||
|
||||
if (timestampPacketDependencies) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
|
||||
@@ -155,7 +155,8 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
|
||||
auto surfaceState = ptrOffset(ssh, arg.bindful);
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,7 +183,8 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
|
||||
auto surfaceState = ptrOffset(ssh, sshOffset);
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0);
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -358,16 +360,18 @@ cl_int Kernel::initialize() {
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), pClDevice->getDevice(), arg);
|
||||
}
|
||||
|
||||
bool useGlobalAtomics = getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics;
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueEventPoolSurfaceAddress.bindful);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
|
||||
kernelDescriptor.payloadMappings.implicitArgs.deviceSideEnqueueDefaultQueueSurfaceAddress.bindful);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
setThreadArbitrationPolicy(hwHelper.getDefaultThreadArbitrationPolicy());
|
||||
@@ -956,7 +960,8 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
if (!kernelArguments[argIndex].isPatched) {
|
||||
patchedArgumentsNum++;
|
||||
@@ -1010,7 +1015,8 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
||||
offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch());
|
||||
allocSize -= offset;
|
||||
}
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
if (!kernelArguments[argIndex].isPatched) {
|
||||
@@ -1508,10 +1514,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
auto context = program->getContextPtr();
|
||||
size_t numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, pClDevice->getDevice(),
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, numDevicesInContext);
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable();
|
||||
@@ -1543,7 +1547,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||
|
||||
if (requiresSshForBuffers(rootDeviceIndex)) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(&pClDevice->getDevice(), surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
isArgSet.set(rootDeviceIndex);
|
||||
}
|
||||
@@ -1596,7 +1601,8 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false,
|
||||
pipe->getSize(), pipe->getCpuAddress(), 0,
|
||||
graphicsAllocation, 0, 0);
|
||||
graphicsAllocation, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
@@ -2457,7 +2463,8 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
|
||||
if (isValidOffset(defaultQueueSurfaceAddress.bindful)) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), defaultQueueSurfaceAddress.bindful);
|
||||
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
|
||||
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
|
||||
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2475,7 +2482,8 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), eventPoolSurfaceAddress.bindful);
|
||||
auto eventPoolBuffer = devQueue->getEventPoolBuffer();
|
||||
Buffer::setSurfaceState(&devQueue->getDevice(), surfaceState, false, false, eventPoolBuffer->getUnderlyingBufferSize(),
|
||||
(void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0);
|
||||
(void *)eventPoolBuffer->getGpuAddress(), 0, eventPoolBuffer, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2507,7 +2515,8 @@ void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation,
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)), syncBuffer.bindful);
|
||||
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
|
||||
auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0);
|
||||
Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, gfxAllocation, 0, 0,
|
||||
getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2821,7 +2830,14 @@ void Kernel::setWorkDim(uint32_t rootDeviceIndex, uint32_t workDim) {
|
||||
uint32_t Kernel::getMaxKernelWorkGroupSize(uint32_t rootDeviceIndex) const {
|
||||
return kernelDeviceInfos[rootDeviceIndex].maxKernelWorkGroupSize;
|
||||
}
|
||||
|
||||
uint32_t Kernel::getSlmTotalSize(uint32_t rootDeviceIndex) const {
|
||||
return kernelDeviceInfos[rootDeviceIndex].slmTotalSize;
|
||||
}
|
||||
|
||||
size_t Kernel::getTotalNumDevicesInContext() const {
|
||||
auto context = program->getContextPtr();
|
||||
return context ? context->getTotalNumDevices() : 1u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -408,6 +408,8 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
MultiDeviceKernel *getMultiDeviceKernel() const { return pMultiDeviceKernel; }
|
||||
void setMultiDeviceKernel(MultiDeviceKernel *pMultiDeviceKernelToSet) { pMultiDeviceKernel = pMultiDeviceKernelToSet; }
|
||||
|
||||
size_t getTotalNumDevicesInContext() const;
|
||||
|
||||
protected:
|
||||
struct ObjectCounts {
|
||||
uint32_t imageCount;
|
||||
|
||||
@@ -762,13 +762,15 @@ void Buffer::setSurfaceState(const Device *device,
|
||||
size_t offset,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel) {
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
bool useGlobalAtomics,
|
||||
size_t numAvailableDevices) {
|
||||
auto multiGraphicsAllocation = MultiGraphicsAllocation(device->getRootDeviceIndex());
|
||||
if (gfxAlloc) {
|
||||
multiGraphicsAllocation.addAllocation(gfxAlloc);
|
||||
}
|
||||
auto buffer = Buffer::createBufferHwFromDevice(device, flags, flagsIntel, svmSize, svmPtr, svmPtr, std::move(multiGraphicsAllocation), offset, true, false, false);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, false, 1u);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, false, false, *device, useGlobalAtomics, numAvailableDevices);
|
||||
delete buffer;
|
||||
}
|
||||
|
||||
|
||||
@@ -127,7 +127,9 @@ class Buffer : public MemObj {
|
||||
size_t offset,
|
||||
GraphicsAllocation *gfxAlloc,
|
||||
cl_mem_flags flags,
|
||||
cl_mem_flags_intel flagsIntel);
|
||||
cl_mem_flags_intel flagsIntel,
|
||||
bool useGlobalAtomics,
|
||||
size_t numDevicesInContext);
|
||||
|
||||
static void provideCompressionHint(GraphicsAllocation::AllocationType allocationType,
|
||||
Context *context,
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "shared/source/program/print_formatter.h"
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
@@ -64,7 +65,9 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
|
||||
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)), printfSurfaceArg.bindful);
|
||||
void *addressToPatch = printfSurface->getUnderlyingBuffer();
|
||||
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0);
|
||||
Buffer::setSurfaceState(&device.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, printfSurface, 0, 0,
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics,
|
||||
kernel->getTotalNumDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1349,10 +1349,13 @@ HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenD
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, mockCsr->passedDispatchFlags.numDevicesInContext);
|
||||
|
||||
context->devices.resize(10);
|
||||
MockDevice subDevice;
|
||||
context->devices.push_back(pClDevice);
|
||||
context->devices.push_back(pClDevice);
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockMultiDeviceKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(10u, mockCsr->passedDispatchFlags.numDevicesInContext);
|
||||
context->devices.resize(1);
|
||||
EXPECT_EQ(3u, mockCsr->passedDispatchFlags.numDevicesInContext);
|
||||
context->devices.pop_back();
|
||||
context->devices.pop_back();
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) {
|
||||
@@ -1708,4 +1711,4 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStream
|
||||
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, {}, false, false, false, *pCmdQ, multiDispatchInfo);
|
||||
|
||||
EXPECT_EQ(baseCommandStreamSize + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM), extendedCommandStreamSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,6 +87,7 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
|
||||
EXPECT_EQ(-1, csr.lastMediaSamplerConfig);
|
||||
EXPECT_EQ(PreemptionMode::Initial, csr.lastPreemptionMode);
|
||||
EXPECT_EQ(0u, csr.latestSentStatelessMocsConfig);
|
||||
EXPECT_FALSE(csr.lastSentUseGlobalAtomics);
|
||||
}
|
||||
|
||||
TEST_F(CommandStreamReceiverTest, WhenMakingResidentThenBufferResidencyFlagIsSet) {
|
||||
|
||||
@@ -132,6 +132,7 @@ struct UltCommandStreamReceiverTest
|
||||
commandStreamReceiver.lastSentThreadArbitrationPolicy = commandStreamReceiver.requiredThreadArbitrationPolicy;
|
||||
commandStreamReceiver.lastSentCoherencyRequest = 0;
|
||||
commandStreamReceiver.lastMediaSamplerConfig = 0;
|
||||
commandStreamReceiver.lastSentUseGlobalAtomics = false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -271,7 +271,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
|
||||
void *addressToPatch = svmAlloc.getUnderlyingBuffer();
|
||||
size_t sizeToPatch = svmAlloc.getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(pDevice, &expectedSurfaceState, false, false,
|
||||
sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0);
|
||||
sizeToPatch, addressToPatch, 0, &svmAlloc, 0, 0, false, 1u);
|
||||
}
|
||||
|
||||
// verify ssh was properly patched
|
||||
@@ -427,7 +427,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
|
||||
}
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &expectedSurfaceState, false, false, svmAlloc.getUnderlyingBufferSize(),
|
||||
svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0);
|
||||
svmAlloc.getUnderlyingBuffer(), 0, &svmAlloc, 0, 0, false, 1u);
|
||||
|
||||
// verify ssh was properly patched
|
||||
int32_t cmpResult = memcmp(&expectedSurfaceState, surfState, rendSurfSize);
|
||||
|
||||
@@ -81,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::lastSentCoherencyRequest;
|
||||
using BaseClass::CommandStreamReceiver::lastSentL3Config;
|
||||
using BaseClass::CommandStreamReceiver::lastSentThreadArbitrationPolicy;
|
||||
using BaseClass::CommandStreamReceiver::lastSentUseGlobalAtomics;
|
||||
using BaseClass::CommandStreamReceiver::lastVmeSubslicesConfig;
|
||||
using BaseClass::CommandStreamReceiver::latestFlushedTaskCount;
|
||||
using BaseClass::CommandStreamReceiver::latestSentStatelessMocsConfig;
|
||||
|
||||
@@ -1307,7 +1307,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BufferSetSurfaceTests, givenBufferSetSurfaceThatMemo
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1326,7 +1326,7 @@ HWTEST_F(BufferSetSurfaceTests, givenDebugVariableToDisableCachingForStatefulBuf
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1346,7 +1346,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsUnalignedToC
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, offsetedPtr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1365,7 +1365,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTo
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1384,7 +1384,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCach
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, CL_MEM_READ_ONLY, 0, false, 1u);
|
||||
|
||||
auto mocs = surfaceState.getMemoryObjectControlState();
|
||||
auto gmmHelper = device->getGmmHelper();
|
||||
@@ -1403,7 +1403,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemorySizeIsUnalignedTh
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, offsetedSize, ptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto width = surfaceState.getWidth();
|
||||
EXPECT_EQ(alignUp(width, 4), width);
|
||||
@@ -1421,7 +1421,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceWhenOffsetIsSpecifiedForSvm
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, offset, &svmAlloc, 0, 0, false, 1u);
|
||||
|
||||
auto baseAddress = surfaceState.getSurfaceBaseAddress();
|
||||
EXPECT_EQ(svmAlloc.getGpuAddress() + offset, baseAddress);
|
||||
@@ -1437,7 +1437,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNotNullThenB
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, size, ptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto surfType = surfaceState.getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_BUFFER, surfType);
|
||||
@@ -1450,7 +1450,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryPtrIsNullThenNull
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
RENDER_SURFACE_STATE surfaceState = {};
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 0, nullptr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
auto surfType = surfaceState.getSurfaceType();
|
||||
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_NULL, surfType);
|
||||
@@ -1684,7 +1684,7 @@ HWTEST_F(BufferSetSurfaceTests, givenMisalignedPointerWhenSurfaceStateIsProgramm
|
||||
uintptr_t ptr = 0xfffff000;
|
||||
void *svmPtr = reinterpret_cast<void *>(ptr);
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
EXPECT_EQ(castToUint64(svmPtr), surfaceState.getSurfaceBaseAddress());
|
||||
SURFACE_STATE_BUFFER_LENGTH length = {};
|
||||
@@ -1701,7 +1701,7 @@ HWTEST_F(BufferSetSurfaceTests, givenBufferThatIsMisalignedWhenSurfaceStateIsBei
|
||||
MockContext context;
|
||||
void *svmPtr = reinterpret_cast<void *>(0x1005);
|
||||
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0);
|
||||
Buffer::setSurfaceState(device.get(), &surfaceState, false, false, 5, svmPtr, 0, nullptr, 0, 0, false, 1u);
|
||||
|
||||
EXPECT_EQ(0u, surfaceState.getMemoryObjectControlState());
|
||||
}
|
||||
|
||||
@@ -59,7 +59,7 @@ class MockBuffer : public MockBufferStorage, public Buffer {
|
||||
}
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0);
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, (externalAlloc != nullptr) ? externalAlloc : &mockGfxAllocation, 0, 0, false, 1u);
|
||||
}
|
||||
GraphicsAllocation *externalAlloc = nullptr;
|
||||
};
|
||||
@@ -80,7 +80,7 @@ class AlignedBuffer : public MockBufferStorage, public Buffer {
|
||||
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), true, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -100,7 +100,7 @@ class UnalignedBuffer : public MockBufferStorage, public Buffer {
|
||||
GraphicsAllocationHelper::toMultiGraphicsAllocation(gfxAllocation), false, false, false) {
|
||||
}
|
||||
void setArgStateful(void *memory, bool forceNonAuxMode, bool disableL3, bool alignSizeForAuxTranslation, bool isReadOnly, const Device &device, bool useGlobalAtomics, size_t numDevicesInContext) override {
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0);
|
||||
Buffer::setSurfaceState(this->device.get(), memory, forceNonAuxMode, disableL3, getSize(), getCpuAddress(), 0, &mockGfxAllocation, 0, 0, false, 1u);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -332,6 +332,7 @@ void CommandStreamReceiver::initProgrammingFlags() {
|
||||
lastMediaSamplerConfig = -1;
|
||||
lastPreemptionMode = PreemptionMode::Initial;
|
||||
latestSentStatelessMocsConfig = 0;
|
||||
lastSentUseGlobalAtomics = false;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) {
|
||||
|
||||
@@ -345,6 +345,7 @@ class CommandStreamReceiver {
|
||||
bool useNewResourceImplicitFlush = false;
|
||||
bool newResources = false;
|
||||
bool useGpuIdleImplicitFlush = false;
|
||||
bool lastSentUseGlobalAtomics = false;
|
||||
};
|
||||
|
||||
typedef CommandStreamReceiver *(*CommandStreamReceiverCreateFunc)(bool withAubDump,
|
||||
|
||||
@@ -367,6 +367,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
latestSentStatelessMocsConfig = mocsIndex;
|
||||
}
|
||||
|
||||
if (dispatchFlags.useGlobalAtomics != lastSentUseGlobalAtomics) {
|
||||
isStateBaseAddressDirty = true;
|
||||
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
|
||||
}
|
||||
|
||||
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
|
||||
|
||||
auto memoryCompressionState = lastMemoryCompressionState;
|
||||
|
||||
Reference in New Issue
Block a user