feature: bindless addressing for images

- program surface states for redescribed images correctly. Image copy
to/from memory are using redescribed surface states,
- refactor state base address programming - program address and size
together, set max size at the beginning due to lack of Enable flag
- set GpuBase in WddmAllocation when external heap is used
- return max ssh required size from kernelInfo or based on stateful args

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-08-17 08:17:57 +00:00
committed by Compute-Runtime-Automation
parent 94bfc3418b
commit 8435160db4
18 changed files with 338 additions and 27 deletions

View File

@@ -549,7 +549,35 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
}
const auto image = Image::fromHandle(argVal);
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
if (kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode == NEO::KernelDescriptor::Bindless) {
NEO::BindlessHeapsHelper *bindlessHeapsHelper = this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
if (bindlessHeapsHelper) {
if (!this->module->getDevice()->getNEODevice()->getMemoryManager()->allocateBindlessSlot(image->getAllocation())) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
auto ssInHeap = image->getAllocation()->getBindlessInfo();
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
// redescribed image's surface state is after image's state
auto bindlessSlotOffset = ssInHeap.surfaceStateOffset + surfaceStateSize;
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap.ssPtr, surfaceStateSize), 0u);
this->residencyContainer.push_back(ssInHeap.heapAllocation);
} else {
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
image->copyRedescribedSurfaceStateToSSH(ssPtr, 0u);
}
} else {
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
}
residencyContainer[argIndex] = image->getAllocation();
return ZE_RESULT_SUCCESS;

View File

@@ -2433,8 +2433,17 @@ struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
passedSurfaceStateHeap = surfaceStateHeap;
passedSurfaceStateOffset = surfaceStateOffset;
}
void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override {
passedRedescribedSurfaceStateHeap = surfaceStateHeap;
passedRedescribedSurfaceStateOffset = surfaceStateOffset;
}
void *passedSurfaceStateHeap = nullptr;
uint32_t passedSurfaceStateOffset = 0;
void *passedRedescribedSurfaceStateHeap = nullptr;
uint32_t passedRedescribedSurfaceStateOffset = 0;
};
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
@@ -2500,6 +2509,110 @@ HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgIma
EXPECT_EQ(nullptr, bindlessInfo.heapAllocation);
}
HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
createKernel();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
imageArg.bindless = 0x0;
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
ze_image_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
auto ret = imageHW->initialize(device, &desc);
auto handle = imageHW->toHandle();
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
ret = kernel->setArgRedescribedImage(3, handle);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, ptrOffset(expectedSsInHeap.ssPtr, surfaceStateSize));
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
}
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
Mock<Module> mockModule(this->device, nullptr);
Mock<KernelImp> mockKernel;
mockKernel.module = &mockModule;
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTImage);
argDescriptor.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
argDescriptor.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
argDescriptor.as<NEO::ArgDescImage>().bindless = 0x0;
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
mockKernel.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfaceStateSize);
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
mockKernel.residencyContainer.resize(1);
ze_image_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
auto ret = imageHW->initialize(device, &desc);
auto handle = imageHW->toHandle();
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
ret = mockKernel.setArgRedescribedImage(0, handle);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
auto expectedSsInHeap = ptrOffset(mockKernel.surfaceStateHeapData.get(), mockKernel.kernelImmData->getDescriptor().getBindlessOffsetToSurfaceState().find(0x0)->second * surfaceStateSize);
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, expectedSsInHeap);
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
}
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgRedescribedImageCalledThenOutOfMemoryErrorReturned, ImageSupport) {
createKernel();
auto mockMemManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
auto bindlessHelper = new MockBindlesHeapsHelper(mockMemManager,
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHelper);
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
imageArg.bindless = 0x0;
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
ze_image_desc_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
auto ret = imageHW->initialize(device, &desc);
auto handle = imageHW->toHandle();
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
mockMemManager->failInDevicePool = true;
mockMemManager->failAllocateSystemMemory = true;
bindlessHelper->globalSsh->getSpace(bindlessHelper->globalSsh->getAvailableSpace());
ret = kernel->setArgRedescribedImage(3, handle);
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, ret);
auto bindlessInfo = imageHW->getAllocation()->getBindlessInfo();
EXPECT_EQ(nullptr, bindlessInfo.ssPtr);
EXPECT_EQ(nullptr, bindlessInfo.heapAllocation);
}
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgBufferCalledThenOutOfMemoryErrorReturned, MatchAny) {
auto mockMemManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());

View File

@@ -751,7 +751,7 @@ size_t EncodeDispatchKernel<Family>::getSizeRequiredSsh(const KernelInfo &kernel
size_t requiredSshSize = kernelInfo.heapInfo.surfaceStateHeapSize;
bool isBindlessKernel = NEO::KernelDescriptor ::isBindlessAddressingKernel(kernelInfo.kernelDescriptor);
if (isBindlessKernel) {
requiredSshSize = kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * sizeof(typename Family::RENDER_SURFACE_STATE);
requiredSshSize = std::max(requiredSshSize, kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * sizeof(typename Family::RENDER_SURFACE_STATE));
}
requiredSshSize = alignUp(requiredSshSize, EncodeDispatchKernel<Family>::getDefaultSshAlignment());
return requiredSshSize;

View File

@@ -7,6 +7,9 @@
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
@@ -18,7 +21,13 @@ constexpr size_t globalSshAllocationSize = 4 * MemoryConstants::pageSize64k;
constexpr size_t borderColorAlphaOffset = alignUp(4 * sizeof(float), MemoryConstants::cacheLineSize);
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
BindlessHeapsHelper::BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable, const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield) : memManager(memManager), isMultiOsContextCapable(isMultiOsContextCapable), rootDeviceIndex(rootDeviceIndex), deviceBitfield(deviceBitfield) {
BindlessHeapsHelper::BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable,
const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield) : surfaceStateSize(memManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>().getRenderSurfaceStateSize()),
memManager(memManager),
isMultiOsContextCapable(isMultiOsContextCapable),
rootDeviceIndex(rootDeviceIndex),
deviceBitfield(deviceBitfield) {
for (auto heapType = 0; heapType < BindlesHeapType::NUM_HEAP_TYPES; heapType++) {
auto allocInFrontWindow = heapType != BindlesHeapType::GLOBAL_DSH;
auto heapAllocation = getHeapAllocation(MemoryConstants::pageSize64k, MemoryConstants::pageSize64k, allocInFrontWindow);
@@ -58,9 +67,11 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
std::lock_guard<std::mutex> autolock(this->mtx);
if (heapType == BindlesHeapType::GLOBAL_SSH) {
if (surfaceStateInHeapVectorReuse.size()) {
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse.back();
surfaceStateInHeapVectorReuse.pop_back();
int index = getReusedSshVectorIndex(ssSize);
if (surfaceStateInHeapVectorReuse[index].size()) {
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse[index].back();
surfaceStateInHeapVectorReuse[index].pop_back();
return surfaceStateFromVector;
}
}
@@ -71,7 +82,7 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
memset(ptrInHeap, 0, ssSize);
auto bindlessOffset = heap->getGraphicsAllocation()->getGpuAddress() - heap->getGraphicsAllocation()->getGpuBaseAddress() + heap->getUsed() - ssSize;
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap};
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap, ssSize};
}
return bindlesInfo;
@@ -119,9 +130,11 @@ bool BindlessHeapsHelper::growHeap(BindlesHeapType heapType) {
void BindlessHeapsHelper::placeSSAllocationInReuseVectorOnFreeMemory(GraphicsAllocation *gfxAllocation) {
auto ssAllocatedInfo = gfxAllocation->getBindlessInfo();
if (ssAllocatedInfo.heapAllocation != nullptr) {
std::lock_guard<std::mutex> autolock(this->mtx);
surfaceStateInHeapVectorReuse.push_back(std::move(ssAllocatedInfo));
int index = getReusedSshVectorIndex(ssAllocatedInfo.ssSize);
surfaceStateInHeapVectorReuse[index].push_back(std::move(ssAllocatedInfo));
}
return;
}

View File

@@ -46,7 +46,18 @@ class BindlessHeapsHelper {
return globalBindlessDsh;
}
int getReusedSshVectorIndex(size_t ssSize) {
int index = 0;
if (ssSize == 2 * surfaceStateSize) {
index = 1;
} else {
UNRECOVERABLE_IF(ssSize != surfaceStateSize);
}
return index;
}
protected:
const size_t surfaceStateSize;
bool growHeap(BindlesHeapType heapType);
MemoryManager *memManager = nullptr;
bool isMultiOsContextCapable = false;
@@ -54,7 +65,7 @@ class BindlessHeapsHelper {
std::unique_ptr<IndirectHeap> surfaceStateHeaps[BindlesHeapType::NUM_HEAP_TYPES];
GraphicsAllocation *borderColorStates;
std::vector<GraphicsAllocation *> ssHeapsAllocations;
std::vector<SurfaceStateInHeapInfo> surfaceStateInHeapVectorReuse;
std::vector<SurfaceStateInHeapInfo> surfaceStateInHeapVectorReuse[2];
std::mutex mtx;
DeviceBitfield deviceBitfield;
bool globalBindlessDsh = false;

View File

@@ -35,7 +35,6 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
StateBaseAddressHelperArgs<GfxFamily> &args) {
*args.stateBaseAddressCmd = GfxFamily::cmdInitStateBaseAddress;
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
@@ -73,6 +72,7 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.globalHeapsBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else {
if (args.dsh) {
args.stateBaseAddressCmd->setDynamicStateBaseAddressModifyEnable(true);

View File

@@ -18,6 +18,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());

View File

@@ -17,6 +17,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());

View File

@@ -51,6 +51,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
if (args.bindlessSurfaceStateBaseAddress != 0) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
} else if (args.ssh) {
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);

View File

@@ -50,6 +50,7 @@ struct SurfaceStateInHeapInfo {
GraphicsAllocation *heapAllocation;
uint64_t surfaceStateOffset;
void *ssPtr;
size_t ssSize;
};
class GraphicsAllocation : public IDNode<GraphicsAllocation> {

View File

@@ -1011,7 +1011,9 @@ bool MemoryManager::allocateBindlessSlot(GraphicsAllocation *allocation) {
if (bindlessHelper && allocation->getBindlessOffset() == std::numeric_limits<uint64_t>::max()) {
auto &gfxCoreHelper = peekExecutionEnvironment().rootDeviceEnvironments[allocation->getRootDeviceIndex()]->getHelper<GfxCoreHelper>();
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
const auto isImage = allocation->getAllocationType() == AllocationType::IMAGE || allocation->getAllocationType() == AllocationType::SHARED_IMAGE;
auto surfStateCount = isImage ? 2 : 1;
auto surfaceStateSize = surfStateCount * gfxCoreHelper.getRenderSurfaceStateSize();
auto surfaceStateInfo = bindlessHelper->allocateSSInHeap(surfaceStateSize, allocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
if (surfaceStateInfo.heapAllocation == nullptr) {

View File

@@ -1367,6 +1367,9 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
if (heapAssigner.useInternal32BitHeap(allocationData.type)) {
auto gmmHelper = getGmmHelper(wddmAllocation->getRootDeviceIndex());
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(getInternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex(), true)));
} else if (heapAssigner.useExternal32BitHeap(allocationData.type)) {
auto gmmHelper = getGmmHelper(wddmAllocation->getRootDeviceIndex());
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(getExternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex(), true)));
}
status = AllocationStatus::Success;

View File

@@ -38,6 +38,7 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
using BaseClass::ssHeapsAllocations;
using BaseClass::surfaceStateHeaps;
using BaseClass::surfaceStateInHeapVectorReuse;
using BaseClass::surfaceStateSize;
IndirectHeap *specialSsh;
IndirectHeap *globalSsh;

View File

@@ -272,7 +272,7 @@ void WddmMock::virtualFree(void *ptr, size_t size) {
void WddmMock::releaseReservedAddress(void *reservedAddress) {
releaseReservedAddressResult.called++;
if (reservedAddress != nullptr) {
std::set<void *>::iterator it;
std::unordered_multiset<void *>::iterator it;
it = reservedAddresses.find(reservedAddress);
EXPECT_NE(reservedAddresses.end(), it);
reservedAddresses.erase(it);
@@ -284,9 +284,8 @@ bool WddmMock::reserveValidAddressRange(size_t size, void *&reservedMem) {
reserveValidAddressRangeResult.called++;
bool ret = Wddm::reserveValidAddressRange(size, reservedMem);
if (reservedMem != nullptr) {
std::set<void *>::iterator it;
std::unordered_multiset<void *>::iterator it;
it = reservedAddresses.find(reservedMem);
EXPECT_EQ(reservedAddresses.end(), it);
reservedAddresses.insert(reservedMem);
}
return ret;

View File

@@ -18,7 +18,7 @@
#include <limits>
#include <memory>
#include <set>
#include <unordered_set>
namespace NEO {
class GraphicsAllocation;
@@ -181,7 +181,7 @@ class WddmMock : public Wddm {
bool callBaseDestroyAllocations = true;
bool failOpenSharedHandle = false;
bool callBaseMapGpuVa = true;
std::set<void *> reservedAddresses;
std::unordered_multiset<void *> reservedAddresses;
uintptr_t virtualAllocAddress = NEO::windowsMinAddress;
bool kmDafEnabled = false;
uint64_t mockPagingFence = 0u;

View File

@@ -111,6 +111,26 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapThen
EXPECT_EQ(ssInHeapInfo.ssPtr, allocInHeapPtr);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapForImageThenTwoBindlessSlotsAreAllocated) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
auto surfaceStateSize = bindlessHeapHelper->surfaceStateSize;
memManager->mockExecutionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->bindlessHeapsHelper.reset(bindlessHeapHelper.release());
MockGraphicsAllocation alloc;
alloc.allocationType = AllocationType::IMAGE;
EXPECT_TRUE(getMemoryManager()->allocateBindlessSlot(&alloc));
auto ssInHeapInfo1 = alloc.getBindlessInfo();
EXPECT_EQ(surfaceStateSize * 2, ssInHeapInfo1.ssSize);
MockGraphicsAllocation alloc2;
alloc2.allocationType = AllocationType::SHARED_IMAGE;
EXPECT_TRUE(getMemoryManager()->allocateBindlessSlot(&alloc2));
auto ssInHeapInfo2 = alloc2.getBindlessInfo();
EXPECT_EQ(surfaceStateSize * 2, ssInHeapInfo2.ssSize);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapTwiceForTheSameAllocationThenTheSameOffsetReturned) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
@@ -213,10 +233,22 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenFreeGraphicsMemoryIs
auto ssInHeapInfo = alloc->getBindlessInfo();
memManager->freeGraphicsMemory(alloc);
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 1u);
auto ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.front();
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 1u);
auto ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].front();
EXPECT_EQ(ssInHeapInfoFromReuseVector.surfaceStateOffset, ssInHeapInfo.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfoFromReuseVector.ssPtr, ssInHeapInfo.ssPtr);
MockGraphicsAllocation *alloc2 = new MockGraphicsAllocation;
alloc2->allocationType = AllocationType::IMAGE;
memManager->allocateBindlessSlot(alloc2);
auto ssInHeapInfo2 = alloc2->getBindlessInfo();
memManager->freeGraphicsMemory(alloc2);
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[1].size(), 1u);
ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[1].front();
EXPECT_EQ(ssInHeapInfoFromReuseVector.surfaceStateOffset, ssInHeapInfo2.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfoFromReuseVector.ssPtr, ssInHeapInfo2.ssPtr);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocatingBindlessSlotTwiceThenNewSlotIsNotAllocatedAndTrueIsReturned) {
@@ -247,13 +279,13 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperPreviousAllocationThenIt
auto ssInHeapInfo = alloc->getBindlessInfo();
memManager->freeGraphicsMemory(alloc);
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 1u);
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 1u);
MockGraphicsAllocation *alloc2 = new MockGraphicsAllocation;
memManager->allocateBindlessSlot(alloc2);
auto reusedSSinHeapInfo = alloc2->getBindlessInfo();
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 0u);
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 0u);
EXPECT_EQ(ssInHeapInfo.surfaceStateOffset, reusedSSinHeapInfo.surfaceStateOffset);
EXPECT_EQ(ssInHeapInfo.ssPtr, reusedSSinHeapInfo.ssPtr);
memManager->freeGraphicsMemory(alloc2);

View File

@@ -560,8 +560,56 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, SbaTest,
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
uint32_t defaultBindlessSurfaceStateSize = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, SbaTest,
givenNoHeapsProvidedAndBindlessBaseSetWhenSBAIsProgrammedThenBindlessSurfaceStateSizeSetToZeroAndBaseAddressSetToPassedValue) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto gmmHelper = pDevice->getGmmHelper();
constexpr uint64_t instructionHeapBase = 0x10000;
constexpr uint64_t internalHeapBase = 0x10000;
constexpr uint64_t generalStateBase = 0x30000;
STATE_BASE_ADDRESS sbaCmd;
StateBaseAddressHelperArgs<FamilyType> args = createSbaHelperArgs<FamilyType>(&sbaCmd, gmmHelper);
args.generalStateBaseAddress = generalStateBase;
args.indirectObjectHeapBaseAddress = internalHeapBase;
args.instructionHeapBaseAddress = instructionHeapBase;
args.setGeneralStateBaseAddress = true;
args.setInstructionStateBaseAddress = true;
args.bindlessSurfaceStateBaseAddress = 0x90004000;
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());
EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress());
EXPECT_EQ(0u, sbaCmd.getDynamicStateBufferSize());
EXPECT_FALSE(sbaCmd.getSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(0u, sbaCmd.getSurfaceStateBaseAddress());
EXPECT_TRUE(sbaCmd.getInstructionBaseAddressModifyEnable());
EXPECT_EQ(instructionHeapBase, sbaCmd.getInstructionBaseAddress());
EXPECT_TRUE(sbaCmd.getInstructionBufferSizeModifyEnable());
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd.getInstructionBufferSize());
EXPECT_TRUE(sbaCmd.getGeneralStateBaseAddressModifyEnable());
EXPECT_TRUE(sbaCmd.getGeneralStateBufferSizeModifyEnable());
if constexpr (is64bit) {
EXPECT_EQ(gmmHelper->decanonize(internalHeapBase), sbaCmd.getGeneralStateBaseAddress());
} else {
EXPECT_EQ(generalStateBase, sbaCmd.getGeneralStateBaseAddress());
}
EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize());
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
EXPECT_EQ(0x90004000u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_TRUE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize());
}
@@ -719,15 +767,22 @@ HWTEST2_F(SbaTest, givenStateBaseAddressPropertiesWhenSettingBindlessSurfaceStat
StateBaseAddressHelperArgs<FamilyType> args = createSbaHelperArgs<FamilyType>(&sbaCmd, gmmHelper, &sbaProperties);
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
sbaCmd = FamilyType::cmdInitStateBaseAddress;
args.bindlessSurfaceStateBaseAddress = 0x80004000;
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
EXPECT_EQ(0x80004000u, sbaCmd.getBindlessSurfaceStateBaseAddress());
EXPECT_TRUE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
sbaProperties.setPropertiesBindingTableSurfaceState(surfaceHeapBase, surfaceHeapSize, surfaceHeapBase, surfaceHeapSize);
sbaCmd = FamilyType::cmdInitStateBaseAddress;
args.bindlessSurfaceStateBaseAddress = 0;
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
EXPECT_EQ(surfaceHeapSize, sbaCmd.getBindlessSurfaceStateSize());

View File

@@ -7,6 +7,7 @@
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/common/fixtures/device_fixture.h"
@@ -18,15 +19,17 @@
namespace NEO {
class WddmMemManagerFixture {
class GlobalBindlessWddmMemManagerFixture {
public:
struct FrontWindowMemManagerMock : public MockWddmMemoryManager {
using MemoryManager::allocate32BitGraphicsMemoryImpl;
FrontWindowMemManagerMock(NEO::ExecutionEnvironment &executionEnvironment) : MockWddmMemoryManager(executionEnvironment) {}
FrontWindowMemManagerMock(bool enable64kbPages, bool enableLocalMemory, NEO::ExecutionEnvironment &executionEnvironment) : MockWddmMemoryManager(enable64kbPages, enableLocalMemory, executionEnvironment) {}
};
void setUp() {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
executionEnvironment = std::make_unique<ExecutionEnvironment>();
executionEnvironment->prepareRootDeviceEnvironments(1);
@@ -40,13 +43,15 @@ class WddmMemManagerFixture {
}
void tearDown() {
}
std::unique_ptr<FrontWindowMemManagerMock> memManager;
std::unique_ptr<ExecutionEnvironment> executionEnvironment;
std::unique_ptr<FrontWindowMemManagerMock> memManager;
DebugManagerStateRestore dbgRestorer;
};
using WddmFrontWindowPoolAllocatorTests = Test<WddmMemManagerFixture>;
using WddmGlobalBindlessAllocatorTests = Test<GlobalBindlessWddmMemManagerFixture>;
TEST_F(WddmFrontWindowPoolAllocatorTests, givenAllocateInFrontWindowPoolFlagWhenWddmAllocate32BitGraphicsMemoryThenAllocateAtHeapBegining) {
TEST_F(WddmGlobalBindlessAllocatorTests, givenAllocateInFrontWindowPoolFlagWhenWddmAllocate32BitGraphicsMemoryThenAllocateAtHeapBegining) {
AllocationData allocData = {};
allocData.type = AllocationType::BUFFER;
EXPECT_FALSE(GraphicsAllocation::isLockable(allocData.type));
@@ -63,4 +68,46 @@ TEST_F(WddmFrontWindowPoolAllocatorTests, givenAllocateInFrontWindowPoolFlagWhen
}
memManager->freeGraphicsMemory(allocation);
}
TEST_F(WddmGlobalBindlessAllocatorTests, givenLocalMemoryWhenSurfaceStatesAllocationCreatedThenGpuBaseAddressIsSetToCorrectBaseAddress) {
MockAllocationProperties properties(0, true, MemoryConstants::pageSize64k, AllocationType::LINEAR_STREAM);
properties.flags.use32BitFrontWindow = true;
memManager.reset(new FrontWindowMemManagerMock(true, true, *executionEnvironment));
executionEnvironment->rootDeviceEnvironments[0]->createBindlessHeapsHelper(memManager.get(), false, 0, 1);
auto allocation = memManager->allocateGraphicsMemoryInPreferredPool(properties, nullptr);
ASSERT_NE(nullptr, allocation);
auto gmmHelper = memManager->getGmmHelper(0);
EXPECT_EQ(gmmHelper->canonize(memManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex(), true)), allocation->getGpuBaseAddress());
ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper());
EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper()->getGlobalHeapsBase(), allocation->getGpuBaseAddress());
memManager->freeGraphicsMemory(allocation);
executionEnvironment->rootDeviceEnvironments[0]->bindlessHeapsHelper.reset();
}
TEST_F(WddmGlobalBindlessAllocatorTests, givenLocalMemoryWhenSurfaceStatesAllocationCreatedInDevicePoolThenGpuBaseAddressIsSetToCorrectBaseAddress) {
AllocationData allocData = {};
allocData.type = AllocationType::LINEAR_STREAM;
allocData.size = MemoryConstants::pageSize64k;
memManager.reset(new FrontWindowMemManagerMock(true, true, *executionEnvironment));
executionEnvironment->rootDeviceEnvironments[0]->createBindlessHeapsHelper(memManager.get(), false, 0, 1);
MemoryManager::AllocationStatus status;
auto allocation = memManager->allocateGraphicsMemoryInDevicePool(allocData, status);
ASSERT_NE(nullptr, allocation);
auto gmmHelper = memManager->getGmmHelper(0);
EXPECT_EQ(gmmHelper->canonize(memManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex(), true)), allocation->getGpuBaseAddress());
ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper());
EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper()->getGlobalHeapsBase(), allocation->getGpuBaseAddress());
memManager->freeGraphicsMemory(allocation);
executionEnvironment->rootDeviceEnvironments[0]->bindlessHeapsHelper.reset();
}
} // namespace NEO