Store SSH per root device in Kernel

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2020-11-23 18:01:38 +00:00 committed by Compute-Runtime-Automation
parent 52d96af5f0
commit 7ec69c33f9
40 changed files with 231 additions and 220 deletions

View File

@ -530,7 +530,7 @@ bool CommandQueue::setupDebugSurface(Kernel *kernel) {
DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(device->getRootDeviceIndex())),
kernel->getKernelInfo().patchInfo.pAllocateSystemThreadSurface->Offset);
void *addressToPatch = reinterpret_cast<void *>(debugSurface->getGpuAddress());
size_t sizeToPatch = debugSurface->getUnderlyingBufferSize();

View File

@ -330,7 +330,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (blockQueue) {
if (parentKernel) {
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, device->getRootDeviceIndex());
blockedCommandsData->surfaceStateHeapSizeEM = minSizeSSHForEM;
}
@ -534,7 +534,8 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
TagNode<HwTimeStamps> *hwTimeStamps,
bool &blocking) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
auto rootDeviceIndex = devQueueHw->getDevice().getRootDeviceIndex();
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;

View File

@ -200,7 +200,7 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
if (heapType == IndirectHeap::SURFACE_STATE) {
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, commandQueue.getDevice().getRootDeviceIndex());
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
{
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());

View File

@ -248,6 +248,7 @@ template <typename GfxFamily>
void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueue, const MultiDispatchInfo &multiDispatchInfo,
bool blockedQueue, IndirectHeap *&dsh, IndirectHeap *&ioh, IndirectHeap *&ssh) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
auto rootDeviceIndex = commandQueue.getDevice().getRootDeviceIndex();
if (blockedQueue) {
size_t dshSize = 0;
@ -257,7 +258,7 @@ void HardwareInterface<GfxFamily>::obtainIndirectHeaps(CommandQueue &commandQueu
if (parentKernel) {
dshSize = commandQueue.getContext().getDefaultDeviceQueue()->getDshBuffer()->getUnderlyingBufferSize();
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
sshSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
iohEqualsDsh = true;
colorCalcSize = static_cast<size_t>(commandQueue.getContext().getDefaultDeviceQueue()->colorCalcStateSize);
} else {

View File

@ -66,9 +66,10 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates();
// Enlarge local copy of SSH by 1 SS
auto device = pKernel->getDevices()[0];
auto rootDeviceIndex = device->getRootDeviceIndex();
GFXCORE_FAMILY genFamily = device->getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel)) {
if (pKernel->isParentKernel || !gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex)) {
// Kernel with no SSH or Kernel EM, not supported
return;
}
@ -103,8 +104,10 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) {
void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
if (isGTPinInitialized) {
auto pCmdQ = reinterpret_cast<CommandQueue *>(pCmdQueue);
auto &device = pCmdQ->getDevice();
auto pKernel = castToObjectOrAbort<Kernel>(kernel);
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize() == 0) {
if (pKernel->isParentKernel || pKernel->getSurfaceStateHeapSize(device.getRootDeviceIndex()) == 0) {
// Kernel with no SSH, not supported
return;
}
@ -132,14 +135,13 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
if (!resource) {
return;
}
auto &device = *pKernel->getDevices()[0];
GFXCORE_FAMILY genFamily = device.getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
size_t gtpinBTI = pKernel->getNumberOfBindingTableStates() - 1;
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI);
void *pSurfaceState = gtpinHelper.getSurfaceState(pKernel, gtpinBTI, device.getRootDeviceIndex());
cl_mem buffer = (cl_mem)resource;
auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device.getDevice());
pBuffer->setArgStateful(pSurfaceState, false, false, false, false, device);
}
}

View File

@ -15,8 +15,8 @@ class GTPinHwHelper {
public:
static GTPinHwHelper &get(GFXCORE_FAMILY gfxCore);
virtual uint32_t getGenVersion() = 0;
virtual bool addSurfaceState(Kernel *pKernel) = 0;
virtual void *getSurfaceState(Kernel *pKernel, size_t bti) = 0;
virtual bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) = 0;
virtual void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) = 0;
protected:
GTPinHwHelper(){};
@ -30,8 +30,8 @@ class GTPinHwHelperHw : public GTPinHwHelper {
return gtpinHwHelper;
}
uint32_t getGenVersion() override;
bool addSurfaceState(Kernel *pKernel) override;
void *getSurfaceState(Kernel *pKernel, size_t bti) override;
bool addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) override;
void *getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) override;
private:
GTPinHwHelperHw(){};

View File

@ -15,11 +15,11 @@
namespace NEO {
template <typename GfxFamily>
bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel) {
bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel, uint32_t rootDeviceIndex) {
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
size_t sshSize = pKernel->getSurfaceStateHeapSize();
size_t sshSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex);
if ((sshSize == 0) || pKernel->isParentKernel) {
// Kernels which do not use SSH or use Execution Model are not supported (yet)
return false;
@ -29,7 +29,7 @@ bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel) {
size_t sizeToEnlarge = ssSize + btsSize;
size_t currBTOffset = pKernel->getBindingTableOffset();
size_t currSurfaceStateSize = currBTOffset;
char *pSsh = static_cast<char *>(pKernel->getSurfaceStateHeap());
char *pSsh = static_cast<char *>(pKernel->getSurfaceStateHeap(rootDeviceIndex));
char *pNewSsh = new char[sshSize + sizeToEnlarge];
memcpy_s(pNewSsh, sshSize + sizeToEnlarge, pSsh, currSurfaceStateSize);
RENDER_SURFACE_STATE *pSS = reinterpret_cast<RENDER_SURFACE_STATE *>(pNewSsh + currSurfaceStateSize);
@ -40,19 +40,19 @@ bool GTPinHwHelperHw<GfxFamily>::addSurfaceState(Kernel *pKernel) {
BINDING_TABLE_STATE *pNewBTS = reinterpret_cast<BINDING_TABLE_STATE *>(pNewSsh + newSurfaceStateSize + currBTCount * btsSize);
*pNewBTS = GfxFamily::cmdInitBindingTableState;
pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset);
pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize);
pKernel->resizeSurfaceStateHeap(rootDeviceIndex, pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize);
return true;
}
template <typename GfxFamily>
void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti) {
void *GTPinHwHelperHw<GfxFamily>::getSurfaceState(Kernel *pKernel, size_t bti, uint32_t rootDeviceIndex) {
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
if ((nullptr == pKernel->getSurfaceStateHeap()) || (bti >= pKernel->getNumberOfBindingTableStates())) {
if ((nullptr == pKernel->getSurfaceStateHeap(rootDeviceIndex)) || (bti >= pKernel->getNumberOfBindingTableStates())) {
return nullptr;
}
auto *pBts = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE))));
auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(), pBts->getSurfaceStatePointer());
auto *pBts = reinterpret_cast<BINDING_TABLE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), (pKernel->getBindingTableOffset() + bti * sizeof(BINDING_TABLE_STATE))));
auto pSurfaceState = ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pBts->getSurfaceStatePointer());
return pSurfaceState;
}

View File

@ -126,7 +126,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
const Kernel &kernel,
size_t localWorkSize = 256);
static size_t getSizeRequiredSSH(
const Kernel &kernel);
const Kernel &kernel, uint32_t rootDeviceIndex);
static size_t getTotalSizeRequiredDSH(
const MultiDispatchInfo &multiDispatchInfo);
@ -135,7 +135,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
static size_t getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo);
static size_t getSshSizeForExecutionModel(const Kernel &kernel);
static size_t getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex);
static void setInterfaceDescriptorOffset(
WALKER_TYPE<GfxFamily> *walkerCmd,
uint32_t &interfaceDescriptorIndex);

View File

@ -75,9 +75,9 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredSSH(
const Kernel &kernel) {
const Kernel &kernel, uint32_t rootDeviceIndex) {
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
auto sizeSSH = kernel.getSurfaceStateHeapSize();
auto sizeSSH = kernel.getSurfaceStateHeapSize(rootDeviceIndex);
sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0;
return sizeSSH;
}
@ -112,11 +112,11 @@ size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo) {
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); });
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel(), dispatchInfo.getClDevice().getRootDeviceIndex()); });
}
template <typename GfxFamily>
size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kernel &kernel) {
size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kernel &kernel, uint32_t rootDeviceIndex) {
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
size_t totalSize = 0;
@ -136,7 +136,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kern
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
totalSize += getSizeRequiredSSH(scheduler);
totalSize += getSizeRequiredSSH(scheduler, rootDeviceIndex);
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
@ -237,7 +237,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed());
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, (kernelInfo.patchInfo.bindingTableState != nullptr) ? kernelInfo.patchInfo.bindingTableState->Count : 0,
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
kernel.getSurfaceStateHeap(rootDeviceIndex), kernel.getSurfaceStateHeapSize(rootDeviceIndex),
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
// Copy our sampler state if it exists

View File

@ -140,7 +140,7 @@ void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, Graphic
uint32_t sshOffset = patch.SurfaceStateHeapOffset;
auto rootDeviceIndex = allocation.getRootDeviceIndex();
void *crossThreadData = getCrossThreadData(rootDeviceIndex);
void *ssh = getSurfaceStateHeap();
void *ssh = getSurfaceStateHeap(rootDeviceIndex);
if (crossThreadData != nullptr) {
auto pp = ptrOffset(crossThreadData, crossThreadDataOffset);
uintptr_t addressToPatch = reinterpret_cast<uintptr_t>(ptrToPatchInCrossThreadData);
@ -235,13 +235,14 @@ cl_int Kernel::initialize() {
}
// allocate our own SSH, if necessary
sshLocalSize = heapInfo.SurfaceStateHeapSize;
kernelDeviceInfos[rootDeviceIndex].sshLocalSize = heapInfo.SurfaceStateHeapSize;
if (sshLocalSize) {
pSshLocal = std::make_unique<char[]>(sshLocalSize);
if (kernelDeviceInfos[rootDeviceIndex].sshLocalSize) {
kernelDeviceInfos[rootDeviceIndex].pSshLocal = std::make_unique<char[]>(kernelDeviceInfos[rootDeviceIndex].sshLocalSize);
// copy the ssh into our local copy
memcpy_s(pSshLocal.get(), sshLocalSize, heapInfo.pSsh, sshLocalSize);
memcpy_s(kernelDeviceInfos[rootDeviceIndex].pSshLocal.get(), kernelDeviceInfos[rootDeviceIndex].sshLocalSize,
heapInfo.pSsh, kernelDeviceInfos[rootDeviceIndex].sshLocalSize);
}
numberOfBindingTableStates = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Count : 0;
localBindingTableOffset = (patchInfo.bindingTableState != nullptr) ? patchInfo.bindingTableState->Offset : 0;
@ -287,7 +288,7 @@ cl_int Kernel::initialize() {
if (patchInfo.pAllocateStatelessEventPoolSurface) {
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
}
@ -296,7 +297,7 @@ cl_int Kernel::initialize() {
if (patchInfo.pAllocateStatelessDefaultDeviceQueueSurface) {
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
}
@ -783,8 +784,8 @@ void Kernel::setStartOffset(uint32_t offset) {
this->startOffset = offset;
}
void *Kernel::getSurfaceStateHeap() const {
return kernelInfo.usesSsh ? pSshLocal.get() : nullptr;
void *Kernel::getSurfaceStateHeap(uint32_t rootDeviceIndex) const {
return kernelInfo.usesSsh ? kernelDeviceInfos[rootDeviceIndex].pSshLocal.get() : nullptr;
}
size_t Kernel::getDynamicStateHeapSize() const {
@ -795,9 +796,9 @@ const void *Kernel::getDynamicStateHeap() const {
return kernelInfo.heapInfo.pDsh;
}
size_t Kernel::getSurfaceStateHeapSize() const {
size_t Kernel::getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const {
return kernelInfo.usesSsh
? sshLocalSize
? kernelDeviceInfos[rootDeviceIndex].sshLocalSize
: 0;
}
@ -805,9 +806,9 @@ size_t Kernel::getNumberOfBindingTableStates() const {
return numberOfBindingTableStates;
}
void Kernel::resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) {
pSshLocal.reset(static_cast<char *>(pNewSsh));
sshLocalSize = static_cast<uint32_t>(newSshSize);
void Kernel::resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset) {
kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(static_cast<char *>(pNewSsh));
kernelDeviceInfos[rootDeviceIndex].sshLocalSize = static_cast<uint32_t>(newSshSize);
numberOfBindingTableStates = newBindingTableCount;
localBindingTableOffset = newBindingTableOffset;
}
@ -882,7 +883,7 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
if (requiresSshForBuffers()) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0);
}
if (!kernelArguments[argIndex].isPatched) {
@ -913,7 +914,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
if (requiresSshForBuffers()) {
const auto &kernelArgInfo = kernelInfo.kernelArgInfo[argIndex];
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
size_t allocSize = 0;
size_t offset = 0;
if (svmAlloc != nullptr) {
@ -1317,7 +1318,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
}
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly, getDevice().getDevice());
}
@ -1342,7 +1343,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
storeKernelArg(argIndex, BUFFER_OBJ, nullptr, argVal, argSize);
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, 0, nullptr, 0, nullptr, 0, 0);
}
@ -1391,7 +1392,7 @@ cl_int Kernel::setArgPipe(uint32_t argIndex,
auto graphicsAllocation = pipe->getGraphicsAllocation(getDevice().getRootDeviceIndex());
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState,
pipe->getSize(), pipe->getCpuAddress(), 0,
graphicsAllocation, 0, 0);
@ -1429,7 +1430,7 @@ cl_int Kernel::setArgImageWithMipLevel(uint32_t argIndex,
storeKernelArg(argIndex, IMAGE_OBJ, clMemObj, argVal, argSize);
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
auto surfaceState = ptrOffset(getSurfaceStateHeap(rootDeviceIndex), kernelArgInfo.offsetHeap);
DEBUG_BREAK_IF(!kernelArgInfo.isImage);
// Sets SS structure
@ -2250,7 +2251,7 @@ void Kernel::patchDefaultDeviceQueue(DeviceQueue *devQueue) {
static_cast<uintptr_t>(devQueue->getQueueBuffer()->getGpuAddressToPatch()));
}
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getQueueBuffer()->getUnderlyingBufferSize(),
(void *)devQueue->getQueueBuffer()->getGpuAddress(), 0, devQueue->getQueueBuffer(), 0, 0);
@ -2272,7 +2273,7 @@ void Kernel::patchEventPool(DeviceQueue *devQueue) {
}
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset);
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, devQueue->getEventPoolBuffer()->getUnderlyingBufferSize(),
(void *)devQueue->getEventPoolBuffer()->getGpuAddress(), 0, devQueue->getEventPoolBuffer(), 0, 0);
@ -2298,13 +2299,14 @@ bool Kernel::usesSyncBuffer() {
}
void Kernel::patchSyncBuffer(Device &device, GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
auto rootDeviceIndex = device.getRootDeviceIndex();
auto &patchInfo = kernelInfo.patchInfo;
auto bufferPatchAddress = ptrOffset(getCrossThreadData(device.getRootDeviceIndex()), patchInfo.pAllocateSyncBuffer->DataParamOffset);
auto bufferPatchAddress = ptrOffset(getCrossThreadData(rootDeviceIndex), patchInfo.pAllocateSyncBuffer->DataParamOffset);
patchWithRequiredSize(bufferPatchAddress, patchInfo.pAllocateSyncBuffer->DataParamSize,
ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset));
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(getSurfaceStateHeap(rootDeviceIndex)),
patchInfo.pAllocateSyncBuffer->SurfaceStateHeapOffset);
auto addressToPatch = gfxAllocation->getUnderlyingBuffer();
auto sizeToPatch = gfxAllocation->getUnderlyingBufferSize();
@ -2353,10 +2355,11 @@ void Kernel::resolveArgs() {
}
}
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
if (canTransformImageTo2dArray) {
imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap());
imageTransformer->transformImagesTo2dArray(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
} else if (imageTransformer->didTransform()) {
imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap());
imageTransformer->transformImagesTo3d(kernelInfo, kernelArguments, getSurfaceStateHeap(rootDeviceIndex));
}
}

View File

@ -153,18 +153,18 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t *paramValueSizeRet) const;
const void *getKernelHeap() const;
void *getSurfaceStateHeap() const;
void *getSurfaceStateHeap(uint32_t rootDeviceIndex) const;
const void *getDynamicStateHeap() const;
size_t getKernelHeapSize() const;
size_t getSurfaceStateHeapSize() const;
size_t getSurfaceStateHeapSize(uint32_t rootDeviceIndex) const;
size_t getDynamicStateHeapSize() const;
size_t getNumberOfBindingTableStates() const;
size_t getBindingTableOffset() const {
return localBindingTableOffset;
}
void resizeSurfaceStateHeap(void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
void resizeSurfaceStateHeap(uint32_t rootDeviceIndex, void *pNewSsh, size_t newSshSize, size_t newBindingTableCount, size_t newBindingTableOffset);
void substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize);
bool isKernelHeapSubstituted() const;
@ -524,8 +524,6 @@ class Kernel : public BaseObject<_cl_kernel> {
size_t numberOfBindingTableStates = 0u;
size_t localBindingTableOffset = 0u;
std::unique_ptr<char[]> pSshLocal;
uint32_t sshLocalSize = 0u;
GraphicsAllocation *kernelReflectionSurface = nullptr;
@ -550,13 +548,15 @@ class Kernel : public BaseObject<_cl_kernel> {
bool debugEnabled = false;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
struct KernelDeviceInfo {
struct KernelDeviceInfo : public NonCopyableClass {
std::unique_ptr<char[]> pSshLocal;
uint32_t sshLocalSize = 0u;
char *crossThreadData = nullptr;
uint32_t crossThreadDataSize = 0u;
GraphicsAllocation *privateSurface = nullptr;
uint64_t privateSurfaceSize = 0u;
};
StackVec<KernelDeviceInfo, 1> kernelDeviceInfos;
std::vector<KernelDeviceInfo> kernelDeviceInfos;
};
} // namespace NEO

View File

@ -62,7 +62,7 @@ void PrintfHandler::prepareDispatch(const MultiDispatchInfo &multiDispatchInfo)
patchWithRequiredSize(printfPatchAddress, kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->DataParamSize, (uintptr_t)printfSurface->getGpuAddressToPatch());
if (kernel->requiresSshForBuffers()) {
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap(rootDeviceIndex)),
kernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset);
void *addressToPatch = printfSurface->getUnderlyingBuffer();
size_t sizeToPatch = printfSurface->getUnderlyingBufferSize();

View File

@ -81,7 +81,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingMediaImageArgThenArgsSetCorrectly) {
typedef typename FamilyType::MEDIA_SURFACE_STATE MEDIA_SURFACE_STATE;
auto pSurfaceState = reinterpret_cast<const MEDIA_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
srcImage->setMediaImageArg(const_cast<MEDIA_SURFACE_STATE *>(pSurfaceState), pClDevice->getRootDeviceIndex());
@ -109,7 +109,7 @@ HWTEST_F(MediaImageSetArgTest, WhenSettingKernelArgImageThenArgsSetCorrectly) {
ASSERT_EQ(CL_SUCCESS, retVal);
auto pSurfaceState = reinterpret_cast<const MEDIA_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
uint64_t surfaceAddress = pSurfaceState->getSurfaceBaseAddress();

View File

@ -27,8 +27,9 @@ namespace clMemLocallyUncachedResourceTests {
template <typename FamilyType>
uint32_t argMocs(Kernel &kernel, size_t argIndex) {
auto rootDeviceIndex = kernel.getDevices()[0]->getRootDeviceIndex();
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap();
auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(rootDeviceIndex);
auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset));
return surfaceState->getMemoryObjectControlState();

View File

@ -484,12 +484,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe
// read args
auto argNum = 0;
auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap();
sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
@ -499,12 +499,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, givenAuxTranslationKernelWhenSettingKe
// write args
auto argNum = 1;
auto expectedMocs = pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap();
sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState());
@ -541,7 +541,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet
{
// read arg
auto argNum = 0;
auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode());
@ -550,7 +550,7 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet
{
// write arg
auto argNum = 1;
auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToNonAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode());
@ -586,7 +586,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet
{
// read arg
auto argNum = 0;
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE, surfaceState->getAuxiliarySurfaceMode());
@ -595,7 +595,7 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet
{
// write arg
auto argNum = 1;
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap();
auto sshBase = mockAuxBuiltInOp.convertToAuxKernel[0]->getSurfaceStateHeap(rootDeviceIndex);
auto sshOffset = mockAuxBuiltInOp.convertToAuxKernel[0]->getKernelInfo().kernelArgInfo[argNum].offsetHeap;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(sshBase, sshOffset));
EXPECT_EQ(AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E, surfaceState->getAuxiliarySurfaceMode());

View File

@ -1038,7 +1038,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(*pDevice, &program));
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset);
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex);
kernel->getAllocatedKernelInfo()->usesSsh = true;
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
@ -1047,7 +1047,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC
auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation();
ASSERT_NE(nullptr, debugSurface);
RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap();
RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex);
EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
}
@ -1058,7 +1058,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet
std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(*pDevice, &program));
MockCommandQueue cmdQ(context.get(), pClDevice, 0);
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset);
kernel->setSshLocal(nullptr, sizeof(RENDER_SURFACE_STATE) + kernel->getAllocatedKernelInfo()->patchInfo.pAllocateSystemThreadSurface->Offset, rootDeviceIndex);
kernel->getAllocatedKernelInfo()->usesSsh = true;
auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
commandStreamReceiver.allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
@ -1068,7 +1068,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCsrWithDebugSurfaceAllocatedWhenSet
cmdQ.setupDebugSurface(kernel.get());
EXPECT_EQ(debugSurface, commandStreamReceiver.getDebugSurfaceAllocation());
RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap();
RENDER_SURFACE_STATE *surfaceState = (RENDER_SURFACE_STATE *)kernel->getSurfaceStateHeap(rootDeviceIndex);
EXPECT_EQ(debugSurface->getGpuAddress(), surfaceState->getSurfaceBaseAddress());
}

View File

@ -733,7 +733,7 @@ HWTEST_F(DispatchWalkerTest, GivenBlockedQueueWhenDispatchingWalkerThenRequiredH
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, kernel, Math::computeTotalElementsCount(localWorkgroupSize));
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(kernel, rootDeviceIndex);
EXPECT_LE(expectedSizeDSH, blockedCommandsData->dsh->getMaxAvailableSpace());
EXPECT_LE(expectedSizeIOH, blockedCommandsData->ioh->getMaxAvailableSpace());

View File

@ -578,7 +578,7 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
kernel->initialize();
auto bindingTableState = reinterpret_cast<BINDING_TABLE_STATE *>(
ptrOffset(kernel->getSurfaceStateHeap(), sPatchBindingTableState.Offset));
ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex), sPatchBindingTableState.Offset));
bindingTableState->setSurfaceStatePointer(0);
auto mockCmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));

View File

@ -400,7 +400,7 @@ HWTEST_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSize
builder.buildDispatchInfos(multiDispatchInfo);
builder.buildDispatchInfos(multiDispatchInfo);
auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize();
auto sizeSSH = multiDispatchInfo.begin()->getKernel()->getSurfaceStateHeapSize(rootDeviceIndex);
sizeSSH += sizeSSH ? FamilyType::BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0;
sizeSSH = alignUp(sizeSSH, MemoryConstants::cacheLineSize);
@ -439,7 +439,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenHelloWorldKernelWhenEnqueingKernelThenH
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*KernelFixture::pKernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -478,7 +478,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenKernelWithSimpleArgWhenEnqueingKernelTh
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, KernelFixture::pKernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*KernelFixture::pKernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *KernelFixture::pKernel, workSize[0]);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*KernelFixture::pKernel, rootDeviceIndex);
EXPECT_EQ(0u, expectedSizeIOH % GPGPU_WALKER::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
EXPECT_EQ(0u, expectedSizeDSH % 64);

View File

@ -93,7 +93,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingImageThenHeapsAndCommandBufferCons
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -140,7 +140,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenCopyingReadWriteImageThenHeapsAndCommandB
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_COPY_IMAGE, false, false, *pCmdQ, kernel.get());
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel.get());
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel.get());
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel.get());
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel.get(), rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -197,7 +197,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageNonBlockingThenHeapsAndComman
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -252,7 +252,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenReadingImageBlockingThenHeapsAndCommandBu
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_READ_IMAGE, false, false, *pCmdQ, kernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -307,7 +307,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageNonBlockingThenHeapsAndComman
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -362,7 +362,7 @@ HWTEST_F(GetSizeRequiredImageTest, WhenWritingImageBlockingThenHeapsAndCommandBu
auto expectedSizeCS = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_WRITE_IMAGE, false, false, *pCmdQ, kernel);
auto expectedSizeDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto expectedSizeIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto expectedSizeSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
// Since each enqueue* may flush, we may see a MI_BATCH_BUFFER_END appended.
expectedSizeCS += sizeof(typename FamilyType::MI_BATCH_BUFFER_END);

View File

@ -180,7 +180,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSshRequiredWhenPatchingSyncBuffer
pClDevice->allocateSyncBufferHandler();
auto syncBufferHandler = getSyncBufferHandler();
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(kernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(kernel->getSurfaceStateHeap(rootDeviceIndex),
sPatchAllocateSyncBuffer.SurfaceStateHeapOffset));
auto bufferAddress = syncBufferHandler->graphicsAllocation->getGpuAddress();
surfaceState->setSurfaceBaseAddress(bufferAddress + 1);

View File

@ -537,7 +537,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSetiingIUpIndirectState
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel), rootDeviceIndex);
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
auto usedBeforeSSH = ssh->getUsed();
@ -565,7 +565,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel), rootDeviceIndex);
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
@ -593,7 +593,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, WhenSettingUpIndirectStateT
auto dsh = devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
ASSERT_NE(nullptr, dsh);
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
size_t surfaceStateHeapSize = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel), rootDeviceIndex);
auto ssh = new IndirectHeap(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
@ -631,7 +631,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, DeviceQueueHwWithKernel, GivenHasBarriersSetWhenCall
}
auto surfaceStateHeapSize =
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel));
HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(const_cast<const Kernel &>(*pKernel), rootDeviceIndex);
auto ssh = std::make_unique<IndirectHeap>(alignedMalloc(surfaceStateHeapSize, MemoryConstants::pageSize), surfaceStateHeapSize);
devQueueHw->setupIndirectState(*ssh, *dsh, pKernel, parentCount, false);

View File

@ -297,7 +297,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
BlockKernelManager *blockManager = pProgram->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize();
size_t parentKernelSSHSize = pKernel->getSurfaceStateHeapSize(rootDeviceIndex);
MockMultiDispatchInfo multiDispatchInfo(pClDevice, pKernel);
@ -340,7 +340,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelEnqueueTest, givenParentKernelWhenEnqueu
EXPECT_EQ(0, memcmp(srcSurfaceState, dstSurfaceState, sizeof(RENDER_SURFACE_STATE)));
}
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize());
blockSSH = ptrOffset(blockSSH, blockKernel->getSurfaceStateHeapSize(rootDeviceIndex));
}
delete blockKernel;

View File

@ -132,7 +132,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
EXPECT_LE(pKernel->getKernelInfo().heapInfo.SurfaceStateHeapSize, ssh.getMaxAvailableSpace());
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex);
EXPECT_LE(minRequiredSize + minRequiredSizeForEM, ssh.getMaxAvailableSpace());
}
@ -162,7 +162,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelDispatchTest, givenParentKernelWhenQueue
ASSERT_NE(nullptr, blockedCommandsData);
size_t minRequiredSize = HardwareCommandsHelper<FamilyType>::getTotalSizeRequiredSSH(multiDispatchInfo) + UnitTestHelper<FamilyType>::getDefaultSshUsage();
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel);
size_t minRequiredSizeForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex);
size_t sshUsed = blockedCommandsData->ssh->getUsed();

View File

@ -53,7 +53,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
EXPECT_NE(nullptr, executionModelDsh);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),
@ -174,7 +174,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, WhenDispatchingSched
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
@ -209,7 +209,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, GivenEarlyReturnSet
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler, rootDeviceIndex);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);
LinearStream &commandStream = getCommandStream<FamilyType, CL_COMMAND_NDRANGE_KERNEL>(*pCmdQ, CsrDependencies(),

View File

@ -97,7 +97,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenLockedEMcritca
dsh->getSpace(mockDevQueue.getDshOffset());
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
auto cmdStreamAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), 4096, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
auto blockedCommandData = std::make_unique<KernelOperation>(new LinearStream(cmdStreamAllocation),
@ -162,7 +162,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -203,7 +203,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -241,7 +241,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenBlockedParentK
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -282,7 +282,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenParentKernelWh
*pCmdQ->getGpgpuCommandStreamReceiver().getInternalAllocationStorage());
blockedCommandData->setHeaps(dsh, ioh, ssh);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
blockedCommandData->surfaceStateHeapSizeEM = minSizeSSHForEM;
PreemptionMode preemptionMode = device->getPreemptionMode();
@ -308,7 +308,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenUsedCommandQue
MockCommandQueue cmdQ(context, device, properties);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
size_t heapSize = 20;
@ -362,7 +362,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, givenNotUsedSSHWhen
parentKernel->createReflectionSurface();
context->setDefaultDeviceQueue(&mockDevQueue);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minSizeSSHForEM = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
size_t heapSize = 20;

View File

@ -109,7 +109,7 @@ struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture,
testing::Test {
void SetUp() override {
device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr)};
device = new MockClDevice{MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr, rootDeviceIndex)};
CommandQueueHwFixture::SetUp(device, 0);
}
void TearDown() override {
@ -125,4 +125,5 @@ struct ParentKernelCommandQueueFixture : public CommandQueueHwFixture,
return std::make_unique<KernelOperation>(commandStream, *gpgpuCsr.getInternalAllocationStorage());
}
const uint32_t rootDeviceIndex = 0u;
};

View File

@ -31,7 +31,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
SchedulerKernel &scheduler = context->getSchedulerKernel();
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel, rootDeviceIndex);
// Setup heaps in pCmdQ
MultiDispatchInfo multiDispatchinfo(&scheduler);

View File

@ -160,7 +160,8 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
executionEnvironment->memoryManager.reset(memoryManager);
initPlatform();
pDevice = pPlatform->getClDevice(0);
cl_device_id device = (cl_device_id)pDevice;
rootDeviceIndex = pDevice->getRootDeviceIndex();
cl_device_id device = pDevice;
ContextFixture::SetUp(1, &device);
driverServices.bufferAllocate = nullptr;
@ -193,6 +194,7 @@ class GTPinFixture : public ContextFixture, public MemoryManagementFixture {
driver_services_t driverServices;
gtpin::ocl::gtpin_events_t gtpinCallbacks;
MockMemoryManagerWithFailures *memoryManager = nullptr;
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
};
typedef Test<GTPinFixture> GTPinTests;
@ -1279,7 +1281,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenG
// Verify that when SSH is removed then during kernel execution
// GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called.
pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0);
pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0);
int prevCount2 = KernelSubmitCallbackCount;
int prevCount3 = CommandBufferCreateCallbackCount;
@ -1392,7 +1394,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUs
// Verify that when SSH is removed then during kernel execution
// GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called.
pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0);
pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0);
cl_event userEvent = clCreateUserEvent(context, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
@ -2167,15 +2169,15 @@ TEST_F(GTPinTests, givenParentKernelWhenGtPinAddingSurfaceStateThenItIsNotAddedA
std::unique_ptr<MockParentKernel> parentKernel(MockParentKernel::create(*pContext));
parentKernel->mockKernelInfo->usesSsh = true;
parentKernel->sshLocalSize = 64;
parentKernel->pSshLocal.reset(new char[64]);
parentKernel->kernelDeviceInfos[rootDeviceIndex].sshLocalSize = 64;
parentKernel->kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(new char[64]);
size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize();
size_t sizeSurfaceStates1 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex);
bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get());
bool surfaceAdded = gtpinHelper.addSurfaceState(parentKernel.get(), rootDeviceIndex);
EXPECT_FALSE(surfaceAdded);
size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize();
size_t sizeSurfaceStates2 = parentKernel->getSurfaceStateHeapSize(rootDeviceIndex);
EXPECT_EQ(sizeSurfaceStates2, sizeSurfaceStates1);
}
@ -2225,47 +2227,47 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) {
size_t numBTS1 = pKernel->getNumberOfBindingTableStates();
EXPECT_EQ(2u, numBTS1);
size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize();
size_t sizeSurfaceStates1 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex);
EXPECT_NE(0u, sizeSurfaceStates1);
size_t offsetBTS1 = pKernel->getBindingTableOffset();
EXPECT_NE(0u, offsetBTS1);
GFXCORE_FAMILY genFamily = pDevice->getHardwareInfo().platform.eRenderCoreFamily;
GTPinHwHelper &gtpinHelper = GTPinHwHelper::get(genFamily);
void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0);
void *pSS1 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex);
EXPECT_NE(nullptr, pSS1);
// Enlarge SSH by one SURFACE STATE element
bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel);
bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex);
EXPECT_TRUE(surfaceAdded);
size_t numBTS2 = pKernel->getNumberOfBindingTableStates();
EXPECT_EQ(numBTS1 + 1, numBTS2);
size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize();
size_t sizeSurfaceStates2 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex);
EXPECT_GT(sizeSurfaceStates2, sizeSurfaceStates1);
size_t offsetBTS2 = pKernel->getBindingTableOffset();
EXPECT_GT(offsetBTS2, offsetBTS1);
void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0);
void *pSS2 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex);
EXPECT_NE(pSS2, pSS1);
pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2);
pSS2 = gtpinHelper.getSurfaceState(pKernel, numBTS2, rootDeviceIndex);
EXPECT_EQ(nullptr, pSS2);
// Remove kernel's SSH
pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0);
pKernel->resizeSurfaceStateHeap(rootDeviceIndex, nullptr, 0, 0, 0);
// Try to enlarge SSH once again, this time the operation must fail
surfaceAdded = gtpinHelper.addSurfaceState(pKernel);
surfaceAdded = gtpinHelper.addSurfaceState(pKernel, rootDeviceIndex);
EXPECT_FALSE(surfaceAdded);
size_t numBTS3 = pKernel->getNumberOfBindingTableStates();
EXPECT_EQ(0u, numBTS3);
size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize();
size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(rootDeviceIndex);
EXPECT_EQ(0u, sizeSurfaceStates3);
size_t offsetBTS3 = pKernel->getBindingTableOffset();
EXPECT_EQ(0u, offsetBTS3);
void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0);
void *pSS3 = gtpinHelper.getSurfaceState(pKernel, 0, rootDeviceIndex);
EXPECT_EQ(nullptr, pSS3);
// Cleanup
@ -2396,7 +2398,7 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOnKernelSubitIsCalledThenCo
std::unique_ptr<MockCommandQueue> cmdQ(new MockCommandQueue(context.get(), pDevice, nullptr));
std::unique_ptr<MockKernel> pKernel(new MockKernel(pProgramm.get(), *pKernelInfo));
pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap));
pKernel->setSshLocal(nullptr, sizeof(surfaceStateHeap), rootDeviceIndex);
kernelOffset = 0x1234;
EXPECT_NE(pKernel->getStartOffset(), kernelOffset);

View File

@ -363,7 +363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes
auto usedAfterSSH = ssh.getUsed();
auto sizeRequiredDSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredDSH(*kernel);
auto sizeRequiredIOH = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(rootDeviceIndex, *kernel, localWorkSize);
auto sizeRequiredSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel);
auto sizeRequiredSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(*kernel, rootDeviceIndex);
EXPECT_GE(sizeRequiredDSH, usedAfterDSH - usedBeforeDSH);
EXPECT_GE(sizeRequiredIOH, usedAfterIOH - usedBeforeIOH);
@ -1005,7 +1005,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
}
mockKernelWithInternal->mockKernel->setCrossThreadData(mockKernelWithInternal->crossThreadData, sizeof(mockKernelWithInternal->crossThreadData));
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal));
mockKernelWithInternal->mockKernel->setSshLocal(mockKernelWithInternal->sshLocal, sizeof(mockKernelWithInternal->sshLocal), rootDeviceIndex);
uint32_t interfaceDescriptorIndex = 0;
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel);
@ -1096,12 +1096,12 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, WhenGettingSizeR
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
auto &scheduler = pContext->getSchedulerKernel();
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize(rootDeviceIndex);
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel));
EXPECT_EQ(totalSize, HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*pKernel, rootDeviceIndex));
}
static const char *binaryFile = "simple_block_kernel";

View File

@ -46,7 +46,7 @@ struct HardwareCommandsTest : ClDeviceFixture,
template <typename GfxFamily>
size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) {
return EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(dstHeap, (srcKernel.getKernelInfo().patchInfo.bindingTableState != nullptr) ? srcKernel.getKernelInfo().patchInfo.bindingTableState->Count : 0,
srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(),
srcKernel.getSurfaceStateHeap(rootDeviceIndex), srcKernel.getSurfaceStateHeapSize(rootDeviceIndex),
srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset());
}
};

View File

@ -58,7 +58,7 @@ TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgument
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->requiresCoherency());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete buffer;
}
@ -76,11 +76,11 @@ HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumen
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_FALSE(pKernel->requiresCoherency());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap));
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);

View File

@ -110,7 +110,7 @@ TEST_F(KernelArgPipeTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsA
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete pipe;
}
@ -127,11 +127,11 @@ HWTEST_F(KernelArgPipeTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArguments
auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());

View File

@ -101,7 +101,7 @@ TEST_F(KernelArgSvmTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAr
auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete[] svmPtr;
}
@ -115,11 +115,11 @@ HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsA
auto retVal = pKernel->setArgSvm(0, 256, svmPtr, nullptr, 0u);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
@ -154,7 +154,7 @@ TEST_F(KernelArgSvmTest, GivenValidSvmAllocStatelessWhenSettingKernelArgThenArgu
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete[] svmPtr;
}
@ -170,11 +170,11 @@ HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocStatefulWhenSettingKernelArgThenArg
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr, &svmAlloc);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
@ -196,7 +196,7 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
@ -214,7 +214,7 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
@ -237,7 +237,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
svmPtr.resize(256);
pKernel->setCrossThreadData(nullptr, sizeof(void *));
pKernel->setSshLocal(nullptr, rendSurfSize);
pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex);
pKernelInfo->requiresSshForBuffers = true;
pKernelInfo->usesSsh = true;
{
@ -254,8 +254,8 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
ASSERT_GE(pKernel->getCrossThreadDataSize(rootDeviceIndex), sizeof(void *));
*reinterpret_cast<void **>(pKernel->getCrossThreadData(rootDeviceIndex)) = 0U;
ASSERT_GE(pKernel->getSurfaceStateHeapSize(), rendSurfSize);
RENDER_SURFACE_STATE *surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(pKernel->getSurfaceStateHeap());
ASSERT_GE(pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize);
RENDER_SURFACE_STATE *surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(pKernel->getSurfaceStateHeap(rootDeviceIndex));
memset(surfState, 0, rendSurfSize);
pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch);
@ -278,7 +278,7 @@ HWTEST_F(KernelArgSvmTest, WhenPatchingWithImplicitSurfaceThenPatchIsApplied) {
// when cross thread and ssh data is not available then should not do anything
pKernel->setCrossThreadData(nullptr, 0);
pKernel->setSshLocal(nullptr, 0);
pKernel->setSshLocal(nullptr, 0, rootDeviceIndex);
pKernel->patchWithImplicitSurface(ptrToPatch, svmAlloc, patch);
}
}
@ -389,7 +389,7 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
kai.offsetBufferOffset = kai.kernelArgPatchInfoVector[0].size;
this->pKernel->setCrossThreadData(nullptr, kai.offsetBufferOffset + sizeof(uint32_t));
this->pKernel->setSshLocal(nullptr, rendSurfSize);
this->pKernel->setSshLocal(nullptr, rendSurfSize, rootDeviceIndex);
this->pKernelInfo->requiresSshForBuffers = true;
this->pKernelInfo->usesSsh = true;
{
@ -405,8 +405,8 @@ HWTEST_TYPED_TEST(KernelArgSvmTestTyped, GivenBufferKernelArgWhenBufferOffsetIsN
*expectedPointerPatchPtr = reinterpret_cast<void *>(0U);
*expectedOffsetPatchPtr = 0U;
ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(), rendSurfSize);
RENDER_SURFACE_STATE *surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(this->pKernel->getSurfaceStateHeap());
ASSERT_GE(this->pKernel->getSurfaceStateHeapSize(rootDeviceIndex), rendSurfSize);
RENDER_SURFACE_STATE *surfState = reinterpret_cast<RENDER_SURFACE_STATE *>(this->pKernel->getSurfaceStateHeap(rootDeviceIndex));
memset(surfState, 0, rendSurfSize);
TypeParam::setArg(*this->pKernel, 0U, ptrToPatch, sizeToPatch, svmAlloc);

View File

@ -736,13 +736,13 @@ HWTEST_F(KernelPrivateSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenPri
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
auto bufferAddress = pKernel->kernelDeviceInfos[pDevice->getRootDeviceIndex()].privateSurface->getGpuAddress();
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessPrivateSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -766,7 +766,7 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
char buffer[16];
MockGraphicsAllocation gfxAlloc(buffer, sizeof(buffer));
MockContext context;
MockContext context(pClDevice);
MockProgram program(&context, false, toClDeviceVector(*pClDevice));
program.setConstantSurface(&gfxAlloc);
@ -779,8 +779,8 @@ TEST_F(KernelPrivateSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenPriv
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex));
program.setConstantSurface(nullptr);
delete pKernel;
@ -984,11 +984,11 @@ HWTEST_F(KernelGlobalSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenGlob
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessGlobalMemorySurfaceWithInitialization->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1025,8 +1025,8 @@ TEST_F(KernelGlobalSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenGloba
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex));
program.setGlobalSurface(nullptr);
delete pKernel;
@ -1156,11 +1156,11 @@ HWTEST_F(KernelConstantSurfaceTest, givenStatefulKernelWhenKernelIsCreatedThenCo
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessConstantMemorySurfaceWithInitialization->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1197,8 +1197,8 @@ TEST_F(KernelConstantSurfaceTest, givenStatelessKernelWhenKernelIsCreatedThenCon
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
EXPECT_EQ(nullptr, pKernel->getSurfaceStateHeap(rootDeviceIndex));
program.setConstantSurface(nullptr);
delete pKernel;
@ -1238,11 +1238,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenK
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1291,7 +1291,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatefulKernelWhenE
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessEventPoolSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1363,7 +1363,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelEventPoolSurfaceTest, givenStatelessKernelWhen
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
if (pClDevice->areOcl21FeaturesSupported() == false) {
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
} else {
}
@ -1442,11 +1442,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1493,11 +1493,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatefulKe
pKernel->patchDefaultDeviceQueue(pDevQueue);
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->patchInfo.pAllocateStatelessDefaultDeviceQueueSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -1537,7 +1537,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, KernelDefaultDeviceQueueSurfaceTest, givenStatelessK
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, pKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete pKernel;
}

View File

@ -21,6 +21,7 @@ using namespace NEO;
class KernelTransformableTest : public ::testing::Test {
public:
void SetUp() override {
rootDeviceIndex = context.getDevice(0)->getRootDeviceIndex();
pKernelInfo = std::make_unique<KernelInfo>();
KernelArgPatchInfo kernelArgPatchInfo;
@ -74,6 +75,7 @@ class KernelTransformableTest : public ::testing::Test {
std::unique_ptr<Image> image;
SKernelBinaryHeaderCommon kernelHeader;
char surfaceStateHeap[0x80];
uint32_t rootDeviceIndex = std::numeric_limits<uint32_t>::max();
};
HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTransformableImagesAndTwoTransformableSamplersWhenAllArgsAreSetThenImagesAreNotTransformed) {
@ -93,7 +95,7 @@ HWTEST_F(KernelTransformableTest, givenKernelThatCannotTranformImagesWithTwoTran
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_3D, firstSurfaceState->getSurfaceType());
@ -120,7 +122,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
EXPECT_EQ(SURFACE_TYPE::SURFACE_TYPE_SURFTYPE_2D, firstSurfaceState->getSurfaceType());
@ -147,7 +149,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));
@ -179,7 +181,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithOneTransformableImageAndTwoTran
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));
@ -201,7 +203,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithImages2dAndTwoTransformableSamp
pKernelInfo->kernelArgInfo[2].isTransformable = true;
pKernelInfo->kernelArgInfo[3].isTransformable = true;
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));
@ -233,7 +235,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithTwoTransformableImagesAndTwoTra
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));
@ -265,7 +267,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithNonTransformableSamplersWhenRes
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));
@ -303,7 +305,7 @@ HWTEST_F(KernelTransformableTest, givenKernelWithoutSamplersAndTransformableImag
pKernel->setArg(2, sizeof(clImage), &clImage);
pKernel->setArg(3, sizeof(clImage), &clImage);
auto ssh = pKernel->getSurfaceStateHeap();
auto ssh = pKernel->getSurfaceStateHeap(rootDeviceIndex);
auto firstSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, firstImageOffset));
auto secondSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(ssh, secondImageOffset));

View File

@ -125,7 +125,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWhenNullArgStatefulThenProgramNullSu
using SURFACE_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
pKernelInfo->requiresSshForBuffers = true;
@ -145,7 +145,7 @@ HWTEST_F(BufferSetArgTest, givenSetKernelArgOnReadOnlyBufferThatIsMisalingedWhen
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
pKernelInfo->requiresSshForBuffers = true;
@ -186,7 +186,7 @@ HWTEST_F(BufferSetArgTest, givenSetArgBufferWithNullArgStatelessThenDontProgramN
HWTEST_F(BufferSetArgTest, givenNonPureStatefulArgWhenRenderCompressedBufferIsSetThenSetNonAuxMode) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->kernelArgInfo[0].offsetHeap));
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex), pKernelInfo->kernelArgInfo[0].offsetHeap));
auto graphicsAllocation = buffer->getGraphicsAllocation(pClDevice->getRootDeviceIndex());
graphicsAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED);
graphicsAllocation->setDefaultGmm(new Gmm(pDevice->getGmmClientContext(), graphicsAllocation->getUnderlyingBuffer(), buffer->getSize(), false));

View File

@ -120,7 +120,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgImageThenSurfaceBaseAddressIsSetCo
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
srcImage->setImageArg(const_cast<RENDER_SURFACE_STATE *>(surfaceState), false, 0, pClDevice->getRootDeviceIndex());
@ -195,7 +195,7 @@ HWTEST_F(ImageSetArgTest, givenCubeMapIndexWhenSetKernelArgImageIsCalledThenModi
src2dImage->setCubeFaceIndex(cubeFaceIndex);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
src2dImage->setImageArg(const_cast<RENDER_SURFACE_STATE *>(surfaceState), false, 0, pClDevice->getRootDeviceIndex());
@ -298,7 +298,7 @@ HWTEST_F(ImageSetArgTest, givenNonCubeMapIndexWhenSetKernelArgImageIsCalledThenD
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ(srcImage->getCubeFaceIndex(), __GMM_NO_CUBE_MAP);
@ -327,7 +327,7 @@ HWTEST_F(ImageSetArgTest, givenOffsetedBufferWhenSetKernelArgImageIscalledThenFu
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
auto graphicsAllocation = srcAllocation;
@ -357,7 +357,7 @@ HWTEST_F(ImageSetArgTest, WhenSettingKernelArgThenPropertiesAreSetCorrectly) {
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
size_t rPitch = srcImage->getImageDesc().image_row_pitch;
@ -408,7 +408,7 @@ HWTEST_F(ImageSetArgTest, givenImage2DWithMipMapsWhenSetKernelArgIsCalledThenMip
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ((uint32_t)mipLevel, surfaceState->getSurfaceMinLod());
EXPECT_EQ((uint32_t)mipCount, surfaceState->getMipCountLod() + 1);
@ -429,7 +429,7 @@ HWTEST_F(ImageSetArgTest, Given2dArrayWhenSettingKernelArgThenPropertiesAreSetCo
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -477,7 +477,7 @@ HWTEST_F(ImageSetArgTest, Given1dArrayWhenSettingKernelArgThenPropertiesAreSetCo
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
@ -533,7 +533,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithoutUnifiedAuxC
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_FALSE(Image::isDepthFormat(image->getImageFormat()));
@ -569,7 +569,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatWhenSetArgIsCalledThenProgramAuxFields
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat()));
@ -600,7 +600,7 @@ HWTEST_F(ImageSetArgTest, givenMultisampledR32Floatx8x24DepthStencilFormatWhenSe
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat()));
@ -627,7 +627,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationAndRenderCompressionWhenSetArgOnMult
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_TRUE(surfaceState->getMultisampledSurfaceStorageFormat() ==
@ -657,7 +657,7 @@ HWTEST_F(ImageSetArgTest, givenDepthFormatAndRenderCompressionWhenSetArgOnMultis
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_TRUE(Image::isDepthFormat(image->getImageFormat()));
@ -692,7 +692,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_TRUE(surfaceState->getAuxiliarySurfaceMode() == AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_CCS_E);
@ -723,7 +723,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_NE(0u, surfaceState->getAuxiliarySurfaceBaseAddress());
@ -758,7 +758,7 @@ HWTEST_F(ImageSetArgTest, givenMcsAllocationWhenSetArgIsCalledWithUnifiedAuxCapa
retVal = clSetKernelArg(pKernel, 0, sizeof(memObj), &memObj);
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(),
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
EXPECT_EQ(pitchValue, surfaceState->getAuxiliarySurfacePitch());
@ -793,7 +793,7 @@ HWTEST_F(ImageSetArgTest, GivenImageFrom1dBufferWhenSettingKernelArgThenProperti
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
auto image = castToObject<Image>(imageFromBuffer);
@ -837,7 +837,7 @@ HWTEST_F(ImageSetArgTest, GivenImageWithClLuminanceFormatWhenSettingKernelArgThe
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
//for CL_LUMINANCE format we override channels to RED to be spec complaint.
EXPECT_EQ(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED, surfaceState->getShaderChannelSelectRed());
@ -963,7 +963,7 @@ HWTEST_F(ImageMediaBlockSetArgTest, WhenSettingKernelArgImageThenPropertiesAreCo
ASSERT_EQ(CL_SUCCESS, retVal);
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(pKernel->getSurfaceStateHeap(),
ptrOffset(pKernel->getSurfaceStateHeap(rootDeviceIndex),
pKernelInfo->kernelArgInfo[0].offsetHeap));
size_t rPitch = srcImage->getImageDesc().image_row_pitch;

View File

@ -544,7 +544,7 @@ TEST_F(MemoryAllocatorTest, givenStatelessKernelWithPrintfWhenPrintfSurfaceIsCre
EXPECT_EQ(allocationAddress, *(uintptr_t *)printfPatchAddress);
EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize());
EXPECT_EQ(0u, kernel.mockKernel->getSurfaceStateHeapSize(rootDeviceIndex));
delete printfHandler;
}
@ -575,11 +575,11 @@ HWTEST_F(MemoryAllocatorTest, givenStatefulKernelWithPrintfWhenPrintfSurfaceIsCr
auto printfAllocation = printfHandler->getSurface();
auto allocationAddress = printfAllocation->getGpuAddress();
EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize());
EXPECT_NE(0u, kernel.mockKernel->getSurfaceStateHeapSize(device->getRootDeviceIndex()));
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
ptrOffset(kernel.mockKernel->getSurfaceStateHeap(),
ptrOffset(kernel.mockKernel->getSurfaceStateHeap(device->getRootDeviceIndex()),
kernel.mockKernel->getKernelInfo().patchInfo.pAllocateStatelessPrintfSurface->SurfaceStateHeapOffset));
auto surfaceAddress = surfaceState->getSurfaceBaseAddress();

View File

@ -43,7 +43,6 @@ class MockKernel : public Kernel {
using Kernel::numberOfBindingTableStates;
using Kernel::patchBufferOffset;
using Kernel::patchWithImplicitSurface;
using Kernel::sshLocalSize;
using Kernel::svmAllocationsRequireCacheFlush;
using Kernel::threadArbitrationPolicy;
using Kernel::unifiedMemoryControls;
@ -181,15 +180,15 @@ class MockKernel : public Kernel {
kernelDeviceInfos[rootDeviceIndex].crossThreadDataSize = static_cast<uint32_t>(mockCrossThreadData.size());
}
void setSshLocal(const void *sshPattern, uint32_t newSshSize) {
sshLocalSize = newSshSize;
void setSshLocal(const void *sshPattern, uint32_t newSshSize, uint32_t rootDeviceIndex) {
kernelDeviceInfos[rootDeviceIndex].sshLocalSize = newSshSize;
if (newSshSize == 0) {
pSshLocal.reset(nullptr);
kernelDeviceInfos[rootDeviceIndex].pSshLocal.reset(nullptr);
} else {
pSshLocal = std::make_unique<char[]>(newSshSize);
kernelDeviceInfos[rootDeviceIndex].pSshLocal = std::make_unique<char[]>(newSshSize);
if (sshPattern) {
memcpy_s(pSshLocal.get(), newSshSize, sshPattern, newSshSize);
memcpy_s(kernelDeviceInfos[rootDeviceIndex].pSshLocal.get(), newSshSize, sshPattern, newSshSize);
}
}
}
@ -291,7 +290,7 @@ class MockKernelWithInternals {
mockProgram = new MockProgram(context, false, deviceVector);
mockKernel = new MockKernel(mockProgram, kernelInfo);
mockKernel->setCrossThreadData(&crossThreadData, sizeof(crossThreadData));
mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal));
mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal), deviceArg.getRootDeviceIndex());
if (addDefaultArg) {
defaultKernelArguments.resize(2);
@ -358,10 +357,9 @@ class MockKernelWithInternals {
class MockParentKernel : public Kernel {
public:
using Kernel::auxTranslationRequired;
using Kernel::kernelDeviceInfos;
using Kernel::kernelInfo;
using Kernel::patchBlocksCurbeWithConstantValues;
using Kernel::pSshLocal;
using Kernel::sshLocalSize;
static MockParentKernel *create(Context &context, bool addChildSimdSize = false, bool addChildGlobalMemory = false, bool addChildConstantMemory = false, bool addPrintfForParent = true, bool addPrintfForBlock = true) {
auto clDevice = context.getDevice(0);