Files
compute-runtime/shared/source/helpers/bindless_heaps_helper.cpp
Mateusz Hoppe 8435160db4 feature: bindless addressing for images
- program surface states for redescribed images correctly. Image copy
to/from memory are using redescribed surface states,
- refactor state base address programming - program address and size
together, set max size at the beginning due to lack of Enable flag
- set GpuBase in WddmAllocation when external heap is used
- return max ssh required size from kernelInfo or based on stateful args

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
2023-08-18 15:59:20 +02:00

142 lines
6.5 KiB
C++

/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
namespace NEO {
constexpr size_t globalSshAllocationSize = 4 * MemoryConstants::pageSize64k;
constexpr size_t borderColorAlphaOffset = alignUp(4 * sizeof(float), MemoryConstants::cacheLineSize);
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
BindlessHeapsHelper::BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable,
const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield) : surfaceStateSize(memManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHelper<GfxCoreHelper>().getRenderSurfaceStateSize()),
memManager(memManager),
isMultiOsContextCapable(isMultiOsContextCapable),
rootDeviceIndex(rootDeviceIndex),
deviceBitfield(deviceBitfield) {
for (auto heapType = 0; heapType < BindlesHeapType::NUM_HEAP_TYPES; heapType++) {
auto allocInFrontWindow = heapType != BindlesHeapType::GLOBAL_DSH;
auto heapAllocation = getHeapAllocation(MemoryConstants::pageSize64k, MemoryConstants::pageSize64k, allocInFrontWindow);
UNRECOVERABLE_IF(heapAllocation == nullptr);
ssHeapsAllocations.push_back(heapAllocation);
surfaceStateHeaps[heapType] = std::make_unique<IndirectHeap>(heapAllocation, true);
}
borderColorStates = getHeapAllocation(MemoryConstants::pageSize, MemoryConstants::pageSize, true);
UNRECOVERABLE_IF(borderColorStates == nullptr);
float borderColorDefault[4] = {0, 0, 0, 0};
memcpy_s(borderColorStates->getUnderlyingBuffer(), sizeof(borderColorDefault), borderColorDefault, sizeof(borderColorDefault));
float borderColorAlpha[4] = {0, 0, 0, 1.0};
memcpy_s(ptrOffset(borderColorStates->getUnderlyingBuffer(), borderColorAlphaOffset), sizeof(borderColorAlpha), borderColorAlpha, sizeof(borderColorDefault));
}
BindlessHeapsHelper::~BindlessHeapsHelper() {
for (auto *allocation : ssHeapsAllocations) {
memManager->freeGraphicsMemory(allocation);
}
memManager->freeGraphicsMemory(borderColorStates);
ssHeapsAllocations.clear();
}
GraphicsAllocation *BindlessHeapsHelper::getHeapAllocation(size_t heapSize, size_t alignment, bool allocInFrontWindow) {
auto allocationType = AllocationType::LINEAR_STREAM;
NEO::AllocationProperties properties{rootDeviceIndex, true, heapSize, allocationType, isMultiOsContextCapable, deviceBitfield};
properties.flags.use32BitFrontWindow = allocInFrontWindow;
properties.alignment = alignment;
return this->memManager->allocateGraphicsMemoryWithProperties(properties);
}
SurfaceStateInHeapInfo BindlessHeapsHelper::allocateSSInHeap(size_t ssSize, GraphicsAllocation *surfaceAllocation, BindlesHeapType heapType) {
auto heap = surfaceStateHeaps[heapType].get();
std::lock_guard<std::mutex> autolock(this->mtx);
if (heapType == BindlesHeapType::GLOBAL_SSH) {
int index = getReusedSshVectorIndex(ssSize);
if (surfaceStateInHeapVectorReuse[index].size()) {
SurfaceStateInHeapInfo surfaceStateFromVector = surfaceStateInHeapVectorReuse[index].back();
surfaceStateInHeapVectorReuse[index].pop_back();
return surfaceStateFromVector;
}
}
void *ptrInHeap = getSpaceInHeap(ssSize, heapType);
SurfaceStateInHeapInfo bindlesInfo = {nullptr, 0, nullptr};
if (ptrInHeap) {
memset(ptrInHeap, 0, ssSize);
auto bindlessOffset = heap->getGraphicsAllocation()->getGpuAddress() - heap->getGraphicsAllocation()->getGpuBaseAddress() + heap->getUsed() - ssSize;
bindlesInfo = SurfaceStateInHeapInfo{heap->getGraphicsAllocation(), bindlessOffset, ptrInHeap, ssSize};
}
return bindlesInfo;
}
void *BindlessHeapsHelper::getSpaceInHeap(size_t ssSize, BindlesHeapType heapType) {
auto heap = surfaceStateHeaps[heapType].get();
if (heap->getAvailableSpace() < ssSize) {
if (!growHeap(heapType)) {
return nullptr;
}
}
return heap->getSpace(ssSize);
}
uint64_t BindlessHeapsHelper::getGlobalHeapsBase() {
return surfaceStateHeaps[BindlesHeapType::GLOBAL_SSH]->getGraphicsAllocation()->getGpuBaseAddress();
}
uint32_t BindlessHeapsHelper::getDefaultBorderColorOffset() {
return static_cast<uint32_t>(borderColorStates->getGpuAddress() - borderColorStates->getGpuBaseAddress());
}
uint32_t BindlessHeapsHelper::getAlphaBorderColorOffset() {
return getDefaultBorderColorOffset() + borderColorAlphaOffset;
}
IndirectHeap *BindlessHeapsHelper::getHeap(BindlesHeapType heapType) {
return surfaceStateHeaps[heapType].get();
}
bool BindlessHeapsHelper::growHeap(BindlesHeapType heapType) {
auto heap = surfaceStateHeaps[heapType].get();
auto allocInFrontWindow = heapType != BindlesHeapType::GLOBAL_DSH;
auto newAlloc = getHeapAllocation(globalSshAllocationSize, MemoryConstants::pageSize64k, allocInFrontWindow);
DEBUG_BREAK_IF(newAlloc == nullptr);
if (newAlloc == nullptr) {
return false;
}
ssHeapsAllocations.push_back(newAlloc);
heap->replaceGraphicsAllocation(newAlloc);
heap->replaceBuffer(newAlloc->getUnderlyingBuffer(),
newAlloc->getUnderlyingBufferSize());
return true;
}
void BindlessHeapsHelper::placeSSAllocationInReuseVectorOnFreeMemory(GraphicsAllocation *gfxAllocation) {
auto ssAllocatedInfo = gfxAllocation->getBindlessInfo();
if (ssAllocatedInfo.heapAllocation != nullptr) {
std::lock_guard<std::mutex> autolock(this->mtx);
int index = getReusedSshVectorIndex(ssAllocatedInfo.ssSize);
surfaceStateInHeapVectorReuse[index].push_back(std::move(ssAllocatedInfo));
}
return;
}
} // namespace NEO