fix: allow kernel access across multiple virtual regions

Related to: NEO-8350

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2024-04-23 07:14:31 +00:00
committed by Compute-Runtime-Automation
parent b499973658
commit b9c1ef65dd
3 changed files with 7 additions and 25 deletions

View File

@@ -39,22 +39,11 @@ struct KernelHw : public KernelImp {
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
// If the allocation is part of a mapped virtual range, then check to see if the buffer size needs to be extended to include more physical buffers.
// If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
Device *device = module->getDevice();
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
if (allocData && allocData->virtualReservationData) {
size_t calcBufferSizeForSsh = bufferSizeForSsh;
for (const auto &mappedAllocationData : allocData->virtualReservationData->mappedAllocations) {
// Add additional allocations buffer size to be programmed to allow full usage of the memory range if the allocation is after this starting address.
if (address != mappedAllocationData.second->ptr && mappedAllocationData.second->ptr > address) {
calcBufferSizeForSsh += mappedAllocationData.second->mappedAllocation->allocation->getUnderlyingBufferSize();
// Only allow for the surface state to be extended up to 4GB in size.
bufferSizeForSsh = std::min(calcBufferSizeForSsh, MemoryConstants::gigaByte * 4);
if (bufferSizeForSsh == MemoryConstants::gigaByte * 4) {
break;
}
}
}
bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
}
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize),

View File

@@ -2510,23 +2510,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
bool phys1Resident = false;
bool phys2Resident = false;
NEO::GraphicsAllocation *baseAlloc = nullptr;
NEO::GraphicsAllocation *offsetAlloc = nullptr;
for (auto alloc : kernel->getResidencyContainer()) {
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
phys1Resident = true;
baseAlloc = alloc;
}
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
phys2Resident = true;
offsetAlloc = alloc;
}
}
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
SurfaceStateBufferLength length = {0};
length.length = static_cast<uint32_t>((baseAlloc->getUnderlyingBufferSize() + offsetAlloc->getUnderlyingBufferSize()) - 1);
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
@@ -2584,21 +2580,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
bool phys1Resident = false;
bool phys2Resident = false;
NEO::GraphicsAllocation *offsetAlloc = nullptr;
for (auto alloc : kernel->getResidencyContainer()) {
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
phys1Resident = true;
}
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
phys2Resident = true;
offsetAlloc = alloc;
}
}
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
SurfaceStateBufferLength length = {0};
length.length = static_cast<uint32_t>(offsetAlloc->getUnderlyingBufferSize() - 1);
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
@@ -2649,18 +2643,16 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
kernel->setArgBuffer(0, sizeof(ptr), &ptr);
bool phys1Resident = false;
NEO::GraphicsAllocation *baseAlloc = nullptr;
for (auto alloc : kernel->getResidencyContainer()) {
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
phys1Resident = true;
baseAlloc = alloc;
}
}
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
SurfaceStateBufferLength length = {0};
length.length = static_cast<uint32_t>(baseAlloc->getUnderlyingBufferSize() - 1);
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));