mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
fix: allow kernel access across multiple virtual regions
Related to: NEO-8350 Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b499973658
commit
b9c1ef65dd
@@ -39,22 +39,11 @@ struct KernelHw : public KernelImp {
|
||||
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
|
||||
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
|
||||
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
|
||||
// If the allocation is part of a mapped virtual range, then check to see if the buffer size needs to be extended to include more physical buffers.
|
||||
// If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
|
||||
Device *device = module->getDevice();
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
if (allocData && allocData->virtualReservationData) {
|
||||
size_t calcBufferSizeForSsh = bufferSizeForSsh;
|
||||
for (const auto &mappedAllocationData : allocData->virtualReservationData->mappedAllocations) {
|
||||
// Add additional allocations buffer size to be programmed to allow full usage of the memory range if the allocation is after this starting address.
|
||||
if (address != mappedAllocationData.second->ptr && mappedAllocationData.second->ptr > address) {
|
||||
calcBufferSizeForSsh += mappedAllocationData.second->mappedAllocation->allocation->getUnderlyingBufferSize();
|
||||
// Only allow for the surface state to be extended up to 4GB in size.
|
||||
bufferSizeForSsh = std::min(calcBufferSizeForSsh, MemoryConstants::gigaByte * 4);
|
||||
if (bufferSizeForSsh == MemoryConstants::gigaByte * 4) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
|
||||
}
|
||||
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize),
|
||||
|
||||
@@ -2510,23 +2510,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
|
||||
|
||||
bool phys1Resident = false;
|
||||
bool phys2Resident = false;
|
||||
NEO::GraphicsAllocation *baseAlloc = nullptr;
|
||||
NEO::GraphicsAllocation *offsetAlloc = nullptr;
|
||||
for (auto alloc : kernel->getResidencyContainer()) {
|
||||
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
|
||||
phys1Resident = true;
|
||||
baseAlloc = alloc;
|
||||
}
|
||||
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
|
||||
phys2Resident = true;
|
||||
offsetAlloc = alloc;
|
||||
}
|
||||
}
|
||||
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
|
||||
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
|
||||
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
|
||||
SurfaceStateBufferLength length = {0};
|
||||
length.length = static_cast<uint32_t>((baseAlloc->getUnderlyingBufferSize() + offsetAlloc->getUnderlyingBufferSize()) - 1);
|
||||
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
|
||||
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
|
||||
@@ -2584,21 +2580,19 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
|
||||
|
||||
bool phys1Resident = false;
|
||||
bool phys2Resident = false;
|
||||
NEO::GraphicsAllocation *offsetAlloc = nullptr;
|
||||
for (auto alloc : kernel->getResidencyContainer()) {
|
||||
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
|
||||
phys1Resident = true;
|
||||
}
|
||||
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAddress)) {
|
||||
phys2Resident = true;
|
||||
offsetAlloc = alloc;
|
||||
}
|
||||
}
|
||||
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
|
||||
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
|
||||
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
|
||||
SurfaceStateBufferLength length = {0};
|
||||
length.length = static_cast<uint32_t>(offsetAlloc->getUnderlyingBufferSize() - 1);
|
||||
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
|
||||
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
|
||||
@@ -2649,18 +2643,16 @@ HWTEST_F(MultiDeviceModuleSetArgBufferTest,
|
||||
kernel->setArgBuffer(0, sizeof(ptr), &ptr);
|
||||
|
||||
bool phys1Resident = false;
|
||||
NEO::GraphicsAllocation *baseAlloc = nullptr;
|
||||
for (auto alloc : kernel->getResidencyContainer()) {
|
||||
if (alloc && alloc->getGpuAddress() == reinterpret_cast<uint64_t>(ptr)) {
|
||||
phys1Resident = true;
|
||||
baseAlloc = alloc;
|
||||
}
|
||||
}
|
||||
auto argInfo = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>();
|
||||
auto surfaceStateAddressRaw = ptrOffset(kernel->getSurfaceStateHeapData(), argInfo.bindful);
|
||||
auto surfaceStateAddress = reinterpret_cast<RENDER_SURFACE_STATE *>(const_cast<unsigned char *>(surfaceStateAddressRaw));
|
||||
SurfaceStateBufferLength length = {0};
|
||||
length.length = static_cast<uint32_t>(baseAlloc->getUnderlyingBufferSize() - 1);
|
||||
length.length = static_cast<uint32_t>((MemoryConstants::fullStatefulRegion)-1);
|
||||
EXPECT_EQ(surfaceStateAddress->getWidth(), static_cast<uint32_t>(length.surfaceState.width + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getHeight(), static_cast<uint32_t>(length.surfaceState.height + 1));
|
||||
EXPECT_EQ(surfaceStateAddress->getDepth(), static_cast<uint32_t>(length.surfaceState.depth + 1));
|
||||
|
||||
Reference in New Issue
Block a user