mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: bindless addressing for images
- program surface states for redescribed images correctly. Image copy to/from memory are using redescribed surface states, - refactor state base address programming - program address and size together, set max size at the beginning due to lack of Enable flag - set GpuBase in WddmAllocation when external heap is used - return max ssh required size from kernelInfo or based on stateful args Related-To: NEO-7063 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
94bfc3418b
commit
8435160db4
@@ -549,7 +549,35 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
}
|
||||
|
||||
const auto image = Image::fromHandle(argVal);
|
||||
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
|
||||
|
||||
if (kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode == NEO::KernelDescriptor::Bindless) {
|
||||
|
||||
NEO::BindlessHeapsHelper *bindlessHeapsHelper = this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
if (bindlessHeapsHelper) {
|
||||
|
||||
if (!this->module->getDevice()->getNEODevice()->getMemoryManager()->allocateBindlessSlot(image->getAllocation())) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
auto ssInHeap = image->getAllocation()->getBindlessInfo();
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||
// redescribed image's surface state is after image's state
|
||||
auto bindlessSlotOffset = ssInHeap.surfaceStateOffset + surfaceStateSize;
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(bindlessSlotOffset));
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
|
||||
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap.ssPtr, surfaceStateSize), 0u);
|
||||
this->residencyContainer.push_back(ssInHeap.heapAllocation);
|
||||
} else {
|
||||
|
||||
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
|
||||
image->copyRedescribedSurfaceStateToSSH(ssPtr, 0u);
|
||||
}
|
||||
} else {
|
||||
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
|
||||
}
|
||||
residencyContainer[argIndex] = image->getAllocation();
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
@@ -2433,8 +2433,17 @@ struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
|
||||
passedSurfaceStateHeap = surfaceStateHeap;
|
||||
passedSurfaceStateOffset = surfaceStateOffset;
|
||||
}
|
||||
|
||||
void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override {
|
||||
passedRedescribedSurfaceStateHeap = surfaceStateHeap;
|
||||
passedRedescribedSurfaceStateOffset = surfaceStateOffset;
|
||||
}
|
||||
|
||||
void *passedSurfaceStateHeap = nullptr;
|
||||
uint32_t passedSurfaceStateOffset = 0;
|
||||
|
||||
void *passedRedescribedSurfaceStateHeap = nullptr;
|
||||
uint32_t passedRedescribedSurfaceStateOffset = 0;
|
||||
};
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
@@ -2500,6 +2509,110 @@ HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgIma
|
||||
EXPECT_EQ(nullptr, bindlessInfo.heapAllocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageBindlessKernelAndGlobalBindlessHelperWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
createKernel();
|
||||
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(neoDevice->getMemoryManager(),
|
||||
neoDevice->getNumGenericSubDevices() > 1,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
neoDevice->getDeviceBitfield());
|
||||
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
|
||||
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
|
||||
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
|
||||
imageArg.bindless = 0x0;
|
||||
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
ze_image_desc_t desc = {};
|
||||
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
|
||||
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||
auto ret = imageHW->initialize(device, &desc);
|
||||
auto handle = imageHW->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
ret = kernel->setArgRedescribedImage(3, handle);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto expectedSsInHeap = imageHW->getAllocation()->getBindlessInfo();
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, ptrOffset(expectedSsInHeap.ssPtr, surfaceStateSize));
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgRedescribedImageCalledThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
Mock<KernelImp> mockKernel;
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
mockKernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
mockKernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::ArgTImage);
|
||||
argDescriptor.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
|
||||
argDescriptor.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescImage>().bindless = 0x0;
|
||||
mockKernel.crossThreadData = std::make_unique<uint8_t[]>(4 * sizeof(uint64_t));
|
||||
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
mockKernel.descriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
mockKernel.surfaceStateHeapData = std::make_unique<uint8_t[]>(surfaceStateSize);
|
||||
mockKernel.descriptor.initBindlessOffsetToSurfaceState();
|
||||
mockKernel.residencyContainer.resize(1);
|
||||
|
||||
ze_image_desc_t desc = {};
|
||||
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
|
||||
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||
auto ret = imageHW->initialize(device, &desc);
|
||||
auto handle = imageHW->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
ret = mockKernel.setArgRedescribedImage(0, handle);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
auto expectedSsInHeap = ptrOffset(mockKernel.surfaceStateHeapData.get(), mockKernel.kernelImmData->getDescriptor().getBindlessOffsetToSurfaceState().find(0x0)->second * surfaceStateSize);
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateHeap, expectedSsInHeap);
|
||||
EXPECT_EQ(imageHW->passedRedescribedSurfaceStateOffset, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgRedescribedImageCalledThenOutOfMemoryErrorReturned, ImageSupport) {
|
||||
createKernel();
|
||||
|
||||
auto mockMemManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
|
||||
auto bindlessHelper = new MockBindlesHeapsHelper(mockMemManager,
|
||||
neoDevice->getNumGenericSubDevices() > 1,
|
||||
neoDevice->getRootDeviceIndex(),
|
||||
neoDevice->getDeviceBitfield());
|
||||
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHelper);
|
||||
|
||||
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
|
||||
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
|
||||
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
|
||||
imageArg.bindless = 0x0;
|
||||
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
ze_image_desc_t desc = {};
|
||||
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
|
||||
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||
auto ret = imageHW->initialize(device, &desc);
|
||||
auto handle = imageHW->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
mockMemManager->failInDevicePool = true;
|
||||
mockMemManager->failAllocateSystemMemory = true;
|
||||
bindlessHelper->globalSsh->getSpace(bindlessHelper->globalSsh->getAvailableSpace());
|
||||
|
||||
ret = kernel->setArgRedescribedImage(3, handle);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, ret);
|
||||
|
||||
auto bindlessInfo = imageHW->getAllocation()->getBindlessInfo();
|
||||
EXPECT_EQ(nullptr, bindlessInfo.ssPtr);
|
||||
EXPECT_EQ(nullptr, bindlessInfo.heapAllocation);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenBindlessKernelAndNoAvailableSpaceOnSshWhenSetArgBufferCalledThenOutOfMemoryErrorReturned, MatchAny) {
|
||||
|
||||
auto mockMemManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
|
||||
|
||||
@@ -751,7 +751,7 @@ size_t EncodeDispatchKernel<Family>::getSizeRequiredSsh(const KernelInfo &kernel
|
||||
size_t requiredSshSize = kernelInfo.heapInfo.surfaceStateHeapSize;
|
||||
bool isBindlessKernel = NEO::KernelDescriptor ::isBindlessAddressingKernel(kernelInfo.kernelDescriptor);
|
||||
if (isBindlessKernel) {
|
||||
requiredSshSize = kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * sizeof(typename Family::RENDER_SURFACE_STATE);
|
||||
requiredSshSize = std::max(requiredSshSize, kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * sizeof(typename Family::RENDER_SURFACE_STATE));
|
||||
}
|
||||
requiredSshSize = alignUp(requiredSshSize, EncodeDispatchKernel<Family>::getDefaultSshAlignment());
|
||||
return requiredSshSize;
|
||||
|
||||
@@ -7,6 +7,9 @@
|
||||
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
@@ -18,7 +21,13 @@ constexpr size_t globalSshAllocationSize = 4 * MemoryConstants::pageSize64k;
|
||||
constexpr size_t borderColorAlphaOffset = alignUp(4 * sizeof(float), MemoryConstants::cacheLineSize);
|
||||
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
|
||||
|
||||
BindlessHeapsHelper::BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable, const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield) : memManager(memManager), isMultiOsContextCapable(isMultiOsContextCapable), rootDeviceIndex(rootDeviceIndex), deviceBitfield(deviceBitfield) {
|
||||
BindlessHeapsHelper::BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable,
|
||||
const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield) : surfaceStateSize(memManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>().getRenderSurfaceStateSize()),
|
||||
memManager(memManager),
|
||||
isMultiOsContextCapable(isMultiOsContextCapable),
|
||||
rootDeviceIndex(rootDeviceIndex),
|
||||
deviceBitfield(deviceBitfield) {
|
||||
|
||||
for (auto heapType = 0; heapType < BindlesHeapType::NUM_HEAP_TYPES; heapType++) {
|
||||
auto allocInFrontWindow = heapType != BindlesHeapType::GLOBAL_DSH;
|
||||
auto heapAllocation = getHeapAllocation(MemoryConstants::pageSize64k, MemoryConstants::pageSize64k, allocInFrontWindow);
|
||||
@@ -58,9 +67,11 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
|
||||
std::lock_guard<std::mutex> autolock(this->mtx);
|
||||
if (heapType == BindlesHeapType::GLOBAL_SSH) {
|
||||
|
||||
if (surfaceStateInHeapVectorReuse.size()) {
|
||||
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse.back();
|
||||
surfaceStateInHeapVectorReuse.pop_back();
|
||||
int index = getReusedSshVectorIndex(ssSize);
|
||||
|
||||
if (surfaceStateInHeapVectorReuse[index].size()) {
|
||||
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse[index].back();
|
||||
surfaceStateInHeapVectorReuse[index].pop_back();
|
||||
return surfaceStateFromVector;
|
||||
}
|
||||
}
|
||||
@@ -71,7 +82,7 @@ SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, Grap
|
||||
memset(ptrInHeap, 0, ssSize);
|
||||
auto bindlessOffset = heap->getGraphicsAllocation()->getGpuAddress() - heap->getGraphicsAllocation()->getGpuBaseAddress() + heap->getUsed() - ssSize;
|
||||
|
||||
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap};
|
||||
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap, ssSize};
|
||||
}
|
||||
|
||||
return bindlesInfo;
|
||||
@@ -119,9 +130,11 @@ bool BindlessHeapsHelper::growHeap(BindlesHeapType heapType) {
|
||||
|
||||
void BindlessHeapsHelper::placeSSAllocationInReuseVectorOnFreeMemory(GraphicsAllocation *gfxAllocation) {
|
||||
auto ssAllocatedInfo = gfxAllocation->getBindlessInfo();
|
||||
|
||||
if (ssAllocatedInfo.heapAllocation != nullptr) {
|
||||
std::lock_guard<std::mutex> autolock(this->mtx);
|
||||
surfaceStateInHeapVectorReuse.push_back(std::move(ssAllocatedInfo));
|
||||
int index = getReusedSshVectorIndex(ssAllocatedInfo.ssSize);
|
||||
surfaceStateInHeapVectorReuse[index].push_back(std::move(ssAllocatedInfo));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -46,7 +46,18 @@ class BindlessHeapsHelper {
|
||||
return globalBindlessDsh;
|
||||
}
|
||||
|
||||
int getReusedSshVectorIndex(size_t ssSize) {
|
||||
int index = 0;
|
||||
if (ssSize == 2 * surfaceStateSize) {
|
||||
index = 1;
|
||||
} else {
|
||||
UNRECOVERABLE_IF(ssSize != surfaceStateSize);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
protected:
|
||||
const size_t surfaceStateSize;
|
||||
bool growHeap(BindlesHeapType heapType);
|
||||
MemoryManager *memManager = nullptr;
|
||||
bool isMultiOsContextCapable = false;
|
||||
@@ -54,7 +65,7 @@ class BindlessHeapsHelper {
|
||||
std::unique_ptr<IndirectHeap> surfaceStateHeaps[BindlesHeapType::NUM_HEAP_TYPES];
|
||||
GraphicsAllocation *borderColorStates;
|
||||
std::vector<GraphicsAllocation *> ssHeapsAllocations;
|
||||
std::vector<SurfaceStateInHeapInfo> surfaceStateInHeapVectorReuse;
|
||||
std::vector<SurfaceStateInHeapInfo> surfaceStateInHeapVectorReuse[2];
|
||||
std::mutex mtx;
|
||||
DeviceBitfield deviceBitfield;
|
||||
bool globalBindlessDsh = false;
|
||||
|
||||
@@ -35,7 +35,6 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
StateBaseAddressHelperArgs<GfxFamily> &args) {
|
||||
|
||||
*args.stateBaseAddressCmd = GfxFamily::cmdInitStateBaseAddress;
|
||||
|
||||
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
|
||||
@@ -73,6 +72,7 @@ void StateBaseAddressHelper<GfxFamily>::programStateBaseAddress(
|
||||
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.globalHeapsBaseAddress);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
} else {
|
||||
if (args.dsh) {
|
||||
args.stateBaseAddressCmd->setDynamicStateBaseAddressModifyEnable(true);
|
||||
|
||||
@@ -18,6 +18,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||
if (args.bindlessSurfaceStateBaseAddress != 0) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
} else if (args.ssh) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
|
||||
|
||||
@@ -17,6 +17,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||
if (args.bindlessSurfaceStateBaseAddress != 0) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
} else if (args.ssh) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
|
||||
|
||||
@@ -51,6 +51,8 @@ void StateBaseAddressHelper<GfxFamily>::appendStateBaseAddressParameters(
|
||||
if (args.bindlessSurfaceStateBaseAddress != 0) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.bindlessSurfaceStateBaseAddress);
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
const auto surfaceStateCount = getMaxBindlessSurfaceStates();
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateSize(surfaceStateCount);
|
||||
} else if (args.ssh) {
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddress(args.ssh->getHeapGpuBase());
|
||||
args.stateBaseAddressCmd->setBindlessSurfaceStateBaseAddressModifyEnable(true);
|
||||
|
||||
@@ -50,6 +50,7 @@ struct SurfaceStateInHeapInfo {
|
||||
GraphicsAllocation *heapAllocation;
|
||||
uint64_t surfaceStateOffset;
|
||||
void *ssPtr;
|
||||
size_t ssSize;
|
||||
};
|
||||
|
||||
class GraphicsAllocation : public IDNode<GraphicsAllocation> {
|
||||
|
||||
@@ -1011,7 +1011,9 @@ bool MemoryManager::allocateBindlessSlot(GraphicsAllocation *allocation) {
|
||||
|
||||
if (bindlessHelper && allocation->getBindlessOffset() == std::numeric_limits<uint64_t>::max()) {
|
||||
auto &gfxCoreHelper = peekExecutionEnvironment().rootDeviceEnvironments[allocation->getRootDeviceIndex()]->getHelper<GfxCoreHelper>();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
const auto isImage = allocation->getAllocationType() == AllocationType::IMAGE || allocation->getAllocationType() == AllocationType::SHARED_IMAGE;
|
||||
auto surfStateCount = isImage ? 2 : 1;
|
||||
auto surfaceStateSize = surfStateCount * gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto surfaceStateInfo = bindlessHelper->allocateSSInHeap(surfaceStateSize, allocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
if (surfaceStateInfo.heapAllocation == nullptr) {
|
||||
|
||||
@@ -1367,6 +1367,9 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
|
||||
if (heapAssigner.useInternal32BitHeap(allocationData.type)) {
|
||||
auto gmmHelper = getGmmHelper(wddmAllocation->getRootDeviceIndex());
|
||||
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(getInternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex(), true)));
|
||||
} else if (heapAssigner.useExternal32BitHeap(allocationData.type)) {
|
||||
auto gmmHelper = getGmmHelper(wddmAllocation->getRootDeviceIndex());
|
||||
wddmAllocation->setGpuBaseAddress(gmmHelper->canonize(getExternalHeapBaseAddress(wddmAllocation->getRootDeviceIndex(), true)));
|
||||
}
|
||||
|
||||
status = AllocationStatus::Success;
|
||||
|
||||
@@ -38,6 +38,7 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
|
||||
using BaseClass::ssHeapsAllocations;
|
||||
using BaseClass::surfaceStateHeaps;
|
||||
using BaseClass::surfaceStateInHeapVectorReuse;
|
||||
using BaseClass::surfaceStateSize;
|
||||
|
||||
IndirectHeap *specialSsh;
|
||||
IndirectHeap *globalSsh;
|
||||
|
||||
@@ -272,7 +272,7 @@ void WddmMock::virtualFree(void *ptr, size_t size) {
|
||||
void WddmMock::releaseReservedAddress(void *reservedAddress) {
|
||||
releaseReservedAddressResult.called++;
|
||||
if (reservedAddress != nullptr) {
|
||||
std::set<void *>::iterator it;
|
||||
std::unordered_multiset<void *>::iterator it;
|
||||
it = reservedAddresses.find(reservedAddress);
|
||||
EXPECT_NE(reservedAddresses.end(), it);
|
||||
reservedAddresses.erase(it);
|
||||
@@ -284,9 +284,8 @@ bool WddmMock::reserveValidAddressRange(size_t size, void *&reservedMem) {
|
||||
reserveValidAddressRangeResult.called++;
|
||||
bool ret = Wddm::reserveValidAddressRange(size, reservedMem);
|
||||
if (reservedMem != nullptr) {
|
||||
std::set<void *>::iterator it;
|
||||
std::unordered_multiset<void *>::iterator it;
|
||||
it = reservedAddresses.find(reservedMem);
|
||||
EXPECT_EQ(reservedAddresses.end(), it);
|
||||
reservedAddresses.insert(reservedMem);
|
||||
}
|
||||
return ret;
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
@@ -181,7 +181,7 @@ class WddmMock : public Wddm {
|
||||
bool callBaseDestroyAllocations = true;
|
||||
bool failOpenSharedHandle = false;
|
||||
bool callBaseMapGpuVa = true;
|
||||
std::set<void *> reservedAddresses;
|
||||
std::unordered_multiset<void *> reservedAddresses;
|
||||
uintptr_t virtualAllocAddress = NEO::windowsMinAddress;
|
||||
bool kmDafEnabled = false;
|
||||
uint64_t mockPagingFence = 0u;
|
||||
|
||||
@@ -111,6 +111,26 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapThen
|
||||
EXPECT_EQ(ssInHeapInfo.ssPtr, allocInHeapPtr);
|
||||
}
|
||||
|
||||
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapForImageThenTwoBindlessSlotsAreAllocated) {
|
||||
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
|
||||
auto surfaceStateSize = bindlessHeapHelper->surfaceStateSize;
|
||||
memManager->mockExecutionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->bindlessHeapsHelper.reset(bindlessHeapHelper.release());
|
||||
|
||||
MockGraphicsAllocation alloc;
|
||||
alloc.allocationType = AllocationType::IMAGE;
|
||||
EXPECT_TRUE(getMemoryManager()->allocateBindlessSlot(&alloc));
|
||||
auto ssInHeapInfo1 = alloc.getBindlessInfo();
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * 2, ssInHeapInfo1.ssSize);
|
||||
|
||||
MockGraphicsAllocation alloc2;
|
||||
alloc2.allocationType = AllocationType::SHARED_IMAGE;
|
||||
EXPECT_TRUE(getMemoryManager()->allocateBindlessSlot(&alloc2));
|
||||
auto ssInHeapInfo2 = alloc2.getBindlessInfo();
|
||||
|
||||
EXPECT_EQ(surfaceStateSize * 2, ssInHeapInfo2.ssSize);
|
||||
}
|
||||
|
||||
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInHeapTwiceForTheSameAllocationThenTheSameOffsetReturned) {
|
||||
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
|
||||
|
||||
@@ -213,10 +233,22 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenFreeGraphicsMemoryIs
|
||||
auto ssInHeapInfo = alloc->getBindlessInfo();
|
||||
|
||||
memManager->freeGraphicsMemory(alloc);
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 1u);
|
||||
auto ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.front();
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 1u);
|
||||
auto ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].front();
|
||||
EXPECT_EQ(ssInHeapInfoFromReuseVector.surfaceStateOffset, ssInHeapInfo.surfaceStateOffset);
|
||||
EXPECT_EQ(ssInHeapInfoFromReuseVector.ssPtr, ssInHeapInfo.ssPtr);
|
||||
|
||||
MockGraphicsAllocation *alloc2 = new MockGraphicsAllocation;
|
||||
alloc2->allocationType = AllocationType::IMAGE;
|
||||
memManager->allocateBindlessSlot(alloc2);
|
||||
|
||||
auto ssInHeapInfo2 = alloc2->getBindlessInfo();
|
||||
|
||||
memManager->freeGraphicsMemory(alloc2);
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[1].size(), 1u);
|
||||
ssInHeapInfoFromReuseVector = bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[1].front();
|
||||
EXPECT_EQ(ssInHeapInfoFromReuseVector.surfaceStateOffset, ssInHeapInfo2.surfaceStateOffset);
|
||||
EXPECT_EQ(ssInHeapInfoFromReuseVector.ssPtr, ssInHeapInfo2.ssPtr);
|
||||
}
|
||||
|
||||
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocatingBindlessSlotTwiceThenNewSlotIsNotAllocatedAndTrueIsReturned) {
|
||||
@@ -247,13 +279,13 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperPreviousAllocationThenIt
|
||||
|
||||
auto ssInHeapInfo = alloc->getBindlessInfo();
|
||||
memManager->freeGraphicsMemory(alloc);
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 1u);
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 1u);
|
||||
MockGraphicsAllocation *alloc2 = new MockGraphicsAllocation;
|
||||
|
||||
memManager->allocateBindlessSlot(alloc2);
|
||||
auto reusedSSinHeapInfo = alloc2->getBindlessInfo();
|
||||
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse.size(), 0u);
|
||||
EXPECT_EQ(bindlessHeapHelperPtr->surfaceStateInHeapVectorReuse[0].size(), 0u);
|
||||
EXPECT_EQ(ssInHeapInfo.surfaceStateOffset, reusedSSinHeapInfo.surfaceStateOffset);
|
||||
EXPECT_EQ(ssInHeapInfo.ssPtr, reusedSSinHeapInfo.ssPtr);
|
||||
memManager->freeGraphicsMemory(alloc2);
|
||||
|
||||
@@ -560,8 +560,56 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, SbaTest,
|
||||
|
||||
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
|
||||
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
uint32_t defaultBindlessSurfaceStateSize = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, SbaTest,
|
||||
givenNoHeapsProvidedAndBindlessBaseSetWhenSBAIsProgrammedThenBindlessSurfaceStateSizeSetToZeroAndBaseAddressSetToPassedValue) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
auto gmmHelper = pDevice->getGmmHelper();
|
||||
|
||||
constexpr uint64_t instructionHeapBase = 0x10000;
|
||||
constexpr uint64_t internalHeapBase = 0x10000;
|
||||
constexpr uint64_t generalStateBase = 0x30000;
|
||||
|
||||
STATE_BASE_ADDRESS sbaCmd;
|
||||
StateBaseAddressHelperArgs<FamilyType> args = createSbaHelperArgs<FamilyType>(&sbaCmd, gmmHelper);
|
||||
args.generalStateBaseAddress = generalStateBase;
|
||||
args.indirectObjectHeapBaseAddress = internalHeapBase;
|
||||
args.instructionHeapBaseAddress = instructionHeapBase;
|
||||
args.setGeneralStateBaseAddress = true;
|
||||
args.setInstructionStateBaseAddress = true;
|
||||
args.bindlessSurfaceStateBaseAddress = 0x90004000;
|
||||
|
||||
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
|
||||
|
||||
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd.getDynamicStateBaseAddress());
|
||||
EXPECT_EQ(0u, sbaCmd.getDynamicStateBufferSize());
|
||||
|
||||
EXPECT_FALSE(sbaCmd.getSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(0u, sbaCmd.getSurfaceStateBaseAddress());
|
||||
|
||||
EXPECT_TRUE(sbaCmd.getInstructionBaseAddressModifyEnable());
|
||||
EXPECT_EQ(instructionHeapBase, sbaCmd.getInstructionBaseAddress());
|
||||
EXPECT_TRUE(sbaCmd.getInstructionBufferSizeModifyEnable());
|
||||
EXPECT_EQ(MemoryConstants::sizeOf4GBinPageEntities, sbaCmd.getInstructionBufferSize());
|
||||
|
||||
EXPECT_TRUE(sbaCmd.getGeneralStateBaseAddressModifyEnable());
|
||||
EXPECT_TRUE(sbaCmd.getGeneralStateBufferSizeModifyEnable());
|
||||
if constexpr (is64bit) {
|
||||
EXPECT_EQ(gmmHelper->decanonize(internalHeapBase), sbaCmd.getGeneralStateBaseAddress());
|
||||
} else {
|
||||
EXPECT_EQ(generalStateBase, sbaCmd.getGeneralStateBaseAddress());
|
||||
}
|
||||
EXPECT_EQ(0xfffffu, sbaCmd.getGeneralStateBufferSize());
|
||||
|
||||
auto surfaceStateCount = StateBaseAddressHelper<FamilyType>::getMaxBindlessSurfaceStates();
|
||||
EXPECT_EQ(0x90004000u, sbaCmd.getBindlessSurfaceStateBaseAddress());
|
||||
EXPECT_TRUE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
EXPECT_EQ(surfaceStateCount, sbaCmd.getBindlessSurfaceStateSize());
|
||||
}
|
||||
|
||||
@@ -719,15 +767,22 @@ HWTEST2_F(SbaTest, givenStateBaseAddressPropertiesWhenSettingBindlessSurfaceStat
|
||||
StateBaseAddressHelperArgs<FamilyType> args = createSbaHelperArgs<FamilyType>(&sbaCmd, gmmHelper, &sbaProperties);
|
||||
|
||||
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
|
||||
|
||||
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
|
||||
EXPECT_EQ(0u, sbaCmd.getBindlessSurfaceStateBaseAddress());
|
||||
EXPECT_FALSE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
|
||||
sbaCmd = FamilyType::cmdInitStateBaseAddress;
|
||||
args.bindlessSurfaceStateBaseAddress = 0x80004000;
|
||||
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
|
||||
|
||||
EXPECT_EQ(defaultBindlessSurfaceStateSize, sbaCmd.getBindlessSurfaceStateSize());
|
||||
EXPECT_EQ(0x80004000u, sbaCmd.getBindlessSurfaceStateBaseAddress());
|
||||
EXPECT_TRUE(sbaCmd.getBindlessSurfaceStateBaseAddressModifyEnable());
|
||||
|
||||
sbaProperties.setPropertiesBindingTableSurfaceState(surfaceHeapBase, surfaceHeapSize, surfaceHeapBase, surfaceHeapSize);
|
||||
|
||||
sbaCmd = FamilyType::cmdInitStateBaseAddress;
|
||||
|
||||
args.bindlessSurfaceStateBaseAddress = 0;
|
||||
StateBaseAddressHelper<FamilyType>::programStateBaseAddress(args);
|
||||
|
||||
EXPECT_EQ(surfaceHeapSize, sbaCmd.getBindlessSurfaceStateSize());
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
@@ -18,15 +19,17 @@
|
||||
|
||||
namespace NEO {
|
||||
|
||||
class WddmMemManagerFixture {
|
||||
class GlobalBindlessWddmMemManagerFixture {
|
||||
public:
|
||||
struct FrontWindowMemManagerMock : public MockWddmMemoryManager {
|
||||
using MemoryManager::allocate32BitGraphicsMemoryImpl;
|
||||
|
||||
FrontWindowMemManagerMock(NEO::ExecutionEnvironment &executionEnvironment) : MockWddmMemoryManager(executionEnvironment) {}
|
||||
FrontWindowMemManagerMock(bool enable64kbPages, bool enableLocalMemory, NEO::ExecutionEnvironment &executionEnvironment) : MockWddmMemoryManager(enable64kbPages, enableLocalMemory, executionEnvironment) {}
|
||||
};
|
||||
|
||||
void setUp() {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
||||
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
|
||||
executionEnvironment = std::make_unique<ExecutionEnvironment>();
|
||||
executionEnvironment->prepareRootDeviceEnvironments(1);
|
||||
@@ -40,13 +43,15 @@ class WddmMemManagerFixture {
|
||||
}
|
||||
void tearDown() {
|
||||
}
|
||||
std::unique_ptr<FrontWindowMemManagerMock> memManager;
|
||||
|
||||
std::unique_ptr<ExecutionEnvironment> executionEnvironment;
|
||||
std::unique_ptr<FrontWindowMemManagerMock> memManager;
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
};
|
||||
|
||||
using WddmFrontWindowPoolAllocatorTests = Test<WddmMemManagerFixture>;
|
||||
using WddmGlobalBindlessAllocatorTests = Test<GlobalBindlessWddmMemManagerFixture>;
|
||||
|
||||
TEST_F(WddmFrontWindowPoolAllocatorTests, givenAllocateInFrontWindowPoolFlagWhenWddmAllocate32BitGraphicsMemoryThenAllocateAtHeapBegining) {
|
||||
TEST_F(WddmGlobalBindlessAllocatorTests, givenAllocateInFrontWindowPoolFlagWhenWddmAllocate32BitGraphicsMemoryThenAllocateAtHeapBegining) {
|
||||
AllocationData allocData = {};
|
||||
allocData.type = AllocationType::BUFFER;
|
||||
EXPECT_FALSE(GraphicsAllocation::isLockable(allocData.type));
|
||||
@@ -63,4 +68,46 @@ TEST_F(WddmFrontWindowPoolAllocatorTests, givenAllocateInFrontWindowPoolFlagWhen
|
||||
}
|
||||
memManager->freeGraphicsMemory(allocation);
|
||||
}
|
||||
|
||||
TEST_F(WddmGlobalBindlessAllocatorTests, givenLocalMemoryWhenSurfaceStatesAllocationCreatedThenGpuBaseAddressIsSetToCorrectBaseAddress) {
|
||||
MockAllocationProperties properties(0, true, MemoryConstants::pageSize64k, AllocationType::LINEAR_STREAM);
|
||||
properties.flags.use32BitFrontWindow = true;
|
||||
|
||||
memManager.reset(new FrontWindowMemManagerMock(true, true, *executionEnvironment));
|
||||
|
||||
executionEnvironment->rootDeviceEnvironments[0]->createBindlessHeapsHelper(memManager.get(), false, 0, 1);
|
||||
|
||||
auto allocation = memManager->allocateGraphicsMemoryInPreferredPool(properties, nullptr);
|
||||
ASSERT_NE(nullptr, allocation);
|
||||
auto gmmHelper = memManager->getGmmHelper(0);
|
||||
EXPECT_EQ(gmmHelper->canonize(memManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex(), true)), allocation->getGpuBaseAddress());
|
||||
|
||||
ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper());
|
||||
EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper()->getGlobalHeapsBase(), allocation->getGpuBaseAddress());
|
||||
|
||||
memManager->freeGraphicsMemory(allocation);
|
||||
executionEnvironment->rootDeviceEnvironments[0]->bindlessHeapsHelper.reset();
|
||||
}
|
||||
|
||||
TEST_F(WddmGlobalBindlessAllocatorTests, givenLocalMemoryWhenSurfaceStatesAllocationCreatedInDevicePoolThenGpuBaseAddressIsSetToCorrectBaseAddress) {
|
||||
AllocationData allocData = {};
|
||||
allocData.type = AllocationType::LINEAR_STREAM;
|
||||
allocData.size = MemoryConstants::pageSize64k;
|
||||
|
||||
memManager.reset(new FrontWindowMemManagerMock(true, true, *executionEnvironment));
|
||||
|
||||
executionEnvironment->rootDeviceEnvironments[0]->createBindlessHeapsHelper(memManager.get(), false, 0, 1);
|
||||
|
||||
MemoryManager::AllocationStatus status;
|
||||
auto allocation = memManager->allocateGraphicsMemoryInDevicePool(allocData, status);
|
||||
ASSERT_NE(nullptr, allocation);
|
||||
auto gmmHelper = memManager->getGmmHelper(0);
|
||||
EXPECT_EQ(gmmHelper->canonize(memManager->getExternalHeapBaseAddress(allocation->getRootDeviceIndex(), true)), allocation->getGpuBaseAddress());
|
||||
|
||||
ASSERT_NE(nullptr, executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper());
|
||||
EXPECT_EQ(executionEnvironment->rootDeviceEnvironments[0]->getBindlessHeapsHelper()->getGlobalHeapsBase(), allocation->getGpuBaseAddress());
|
||||
|
||||
memManager->freeGraphicsMemory(allocation);
|
||||
executionEnvironment->rootDeviceEnvironments[0]->bindlessHeapsHelper.reset();
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user