compute-runtime/shared/source/command_stream/scratch_space_controller_xe...

213 lines
12 KiB
C++

/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_stream/scratch_space_controller_xehp_plus.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
namespace NEO {
ScratchSpaceControllerXeHPPlus::ScratchSpaceControllerXeHPPlus(uint32_t rootDeviceIndex,
ExecutionEnvironment &environment,
InternalAllocationStorage &allocationStorage)
: ScratchSpaceController(rootDeviceIndex, environment, allocationStorage) {
auto &hwHelper = HwHelper::get(environment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily);
singleSurfaceStateSize = hwHelper.getRenderSurfaceStateSize();
if (DebugManager.flags.EnablePrivateScratchSlot1.get() != -1) {
privateScratchSpaceSupported = !!DebugManager.flags.EnablePrivateScratchSlot1.get();
}
if (privateScratchSpaceSupported) {
ScratchSpaceControllerXeHPPlus::stateSlotsCount *= 2;
}
}
void ScratchSpaceControllerXeHPPlus::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) {
if (surfaceStateHeap != newSsh) {
surfaceStateHeap = static_cast<char *>(newSsh);
if (scratchAllocation == nullptr) {
cfeDirty = false;
} else {
if (changeId) {
slotId = 0;
}
programSurfaceState();
cfeDirty = true;
}
}
}
void ScratchSpaceControllerXeHPPlus::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t offset,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false);
bool scratchSurfaceDirty;
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
vfeStateDirty = true;
updateSlots = true;
programSurfaceState();
}
}
void ScratchSpaceControllerXeHPPlus::programSurfaceState() {
if (updateSlots) {
slotId++;
}
UNRECOVERABLE_IF(slotId >= stateSlotsCount);
UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr);
void *surfaceStateForScratchAllocation = ptrOffset(static_cast<void *>(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset));
programSurfaceStateAtPtr(surfaceStateForScratchAllocation);
}
void ScratchSpaceControllerXeHPPlus::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) {
auto &hwHelper = HwHelper::get(executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->platform.eRenderCoreFamily);
uint64_t scratchAllocationAddress = 0u;
if (scratchAllocation) {
scratchAllocationAddress = scratchAllocation->getGpuAddress();
}
hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0,
perThreadScratchSize, nullptr, false, scratchType, false, true);
if (privateScratchSpaceSupported) {
void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize);
uint64_t privateScratchAllocationAddress = 0u;
if (privateScratchAllocation) {
privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress();
}
hwHelper.setRenderSurfaceStateForBuffer(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex],
surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch,
privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false,
scratchType, false, true);
}
}
uint64_t ScratchSpaceControllerXeHPPlus::calculateNewGSH() {
return 0u;
}
uint64_t ScratchSpaceControllerXeHPPlus::getScratchPatchAddress() {
uint64_t scratchAddress = 0u;
if (scratchAllocation || privateScratchAllocation) {
if (ApiSpecificConfig::getBindlessConfiguration()) {
scratchAddress = bindlessSS.surfaceStateOffset;
} else {
scratchAddress = static_cast<uint64_t>(getOffsetToSurfaceState(slotId + sshOffset));
}
}
return scratchAddress;
}
size_t ScratchSpaceControllerXeHPPlus::getOffsetToSurfaceState(uint32_t requiredSlotCount) const {
auto offset = requiredSlotCount * singleSurfaceStateSize;
if (privateScratchSpaceSupported) {
offset *= 2;
}
return offset;
}
void ScratchSpaceControllerXeHPPlus::reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) {
if (heapType == IndirectHeap::SURFACE_STATE) {
indirectHeap->getSpace(getOffsetToSurfaceState(stateSlotsCount));
}
}
void ScratchSpaceControllerXeHPPlus::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::CommandStreamReceiver *csr) {
bool scratchSurfaceDirty;
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::SCRATCH_SSH);
programSurfaceStateAtPtr(bindlessSS.ssPtr);
vfeStateDirty = true;
}
csr->makeResident(*bindlessSS.heapAllocation);
}
void ScratchSpaceControllerXeHPPlus::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,
bool &vfeStateDirty) {
uint32_t requiredPerThreadScratchSizeAlignedUp = alignUp(requiredPerThreadScratchSize, 64);
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeAlignedUp * computeUnitsUsedForScratch;
scratchSurfaceDirty = false;
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
if (scratchSizeBytes < requiredScratchSizeInBytes) {
if (scratchAllocation) {
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);
}
scratchSurfaceDirty = true;
scratchSizeBytes = requiredScratchSizeInBytes;
perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp;
AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, GraphicsAllocation::AllocationType::SCRATCH_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()};
scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
if (privateScratchSpaceSupported) {
uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = alignUp(requiredPerThreadPrivateScratchSize, 64);
size_t requiredPrivateScratchSizeInBytes = requiredPerThreadPrivateScratchSizeAlignedUp * computeUnitsUsedForScratch;
if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) {
if (privateScratchAllocation) {
privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(privateScratchAllocation), TEMPORARY_ALLOCATION);
}
privateScratchSizeBytes = requiredPrivateScratchSizeInBytes;
perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp;
scratchSurfaceDirty = true;
AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, GraphicsAllocation::AllocationType::PRIVATE_SURFACE, multiTileCapable, false, osContext.getDeviceBitfield()};
privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
}
}
}
void ScratchSpaceControllerXeHPPlus::programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
sshOffset = scratchSlot;
updateSlots = false;
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
for (uint32_t i = 1; i < heapContainer.size(); ++i) {
surfaceStateHeap = static_cast<char *>(heapContainer[i]->getUnderlyingBuffer());
updateSlots = false;
programSurfaceState();
}
updateSlots = true;
}
} // namespace NEO