mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-26 23:33:20 +08:00
refactor: move surface state programming method to base class
Related-To: NEO-15374 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b90b77e4e3
commit
5893fb08fb
@@ -29,90 +29,6 @@ namespace L0 {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct KernelHw : public KernelImp {
|
||||
using KernelImp::KernelImp;
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
||||
uint64_t baseAddress = alloc->getGpuAddressToPatch();
|
||||
|
||||
// Remove misaligned bytes, accounted for in bufferOffset patch token
|
||||
baseAddress &= this->surfaceStateAlignmentMask;
|
||||
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
|
||||
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
|
||||
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
|
||||
// If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
|
||||
Device *device = module->getDevice();
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
|
||||
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(getCrossThreadDataSpan(),
|
||||
argInfo.bufferOffset, static_cast<uint32_t>(offset));
|
||||
bool offsetedAddress = false;
|
||||
if (false == offsetWasPatched) {
|
||||
// fallback to handling offset in surface state
|
||||
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
|
||||
baseAddress = reinterpret_cast<uintptr_t>(address);
|
||||
bufferSizeForSsh -= offset;
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
|
||||
|
||||
offset = 0;
|
||||
}
|
||||
void *surfaceStateAddress = nullptr;
|
||||
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
if (NEO::isValidOffset(argInfo.bindful)) {
|
||||
surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), argInfo.bindful);
|
||||
surfaceState = *reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress);
|
||||
|
||||
} else if (NEO::isValidOffset(argInfo.bindless)) {
|
||||
state.isBindlessOffsetSet[argIndex] = false;
|
||||
state.usingSurfaceStateHeap[argIndex] = false;
|
||||
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
|
||||
surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
|
||||
state.isBindlessOffsetSet[argIndex] = true;
|
||||
} else {
|
||||
state.usingSurfaceStateHeap[argIndex] = true;
|
||||
surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * sizeof(typename GfxFamily::RENDER_SURFACE_STATE));
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t bufferAddressForSsh = baseAddress;
|
||||
bufferSizeForSsh += misalignedSize;
|
||||
bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
|
||||
|
||||
bool l3Enabled = true;
|
||||
// Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
|
||||
// Most commonly this issue will occur with Host Point Allocations from customers.
|
||||
l3Enabled = isL3Capable(*alloc);
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
|
||||
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
|
||||
l3Enabled = false;
|
||||
}
|
||||
|
||||
if (l3Enabled == false) {
|
||||
this->state.kernelRequiresQueueUncachedMocsCount++;
|
||||
}
|
||||
auto isDebuggerActive = neoDevice->getDebugger() != nullptr;
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = &surfaceState;
|
||||
args.graphicsAddress = bufferAddressForSsh;
|
||||
if (allocData && allocData->virtualReservationData) {
|
||||
bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
|
||||
}
|
||||
args.size = bufferSizeForSsh;
|
||||
args.mocs = device->getMOCS(l3Enabled, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = alloc;
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.implicitScaling = device->isImplicitScalingCapable();
|
||||
args.isDebuggerActive = isDebuggerActive;
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
|
||||
UNRECOVERABLE_IF(surfaceStateAddress == nullptr);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1083,6 +1083,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
this->walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
|
||||
this->surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
|
||||
this->surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
|
||||
this->renderSurfaceStateSize = gfxHelper.getRenderSurfaceStateSize();
|
||||
|
||||
if (isaAllocation->getAllocationType() == NEO::AllocationType::kernelIsaInternal && this->kernelImmData->getIsaParentAllocation() == nullptr) {
|
||||
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
@@ -1662,4 +1663,83 @@ void KernelImp::evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::Kerne
|
||||
kernelDescriptor.kernelAttributes.simdSize);
|
||||
}
|
||||
|
||||
void KernelImp::setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) {
|
||||
uint64_t baseAddress = alloc->getGpuAddressToPatch();
|
||||
|
||||
// Remove misaligned bytes, accounted for in bufferOffset patch token
|
||||
baseAddress &= this->surfaceStateAlignmentMask;
|
||||
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
|
||||
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
|
||||
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
|
||||
// If the allocation is part of a mapped virtual range, then set size to maximum to allow for access across multiple virtual ranges.
|
||||
Device *device = module->getDevice();
|
||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(alloc->getGpuAddress()));
|
||||
|
||||
auto argInfo = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
bool offsetWasPatched = NEO::patchNonPointer<uint32_t, uint32_t>(getCrossThreadDataSpan(),
|
||||
argInfo.bufferOffset, static_cast<uint32_t>(offset));
|
||||
bool offsetedAddress = false;
|
||||
if (false == offsetWasPatched) {
|
||||
// fallback to handling offset in surface state
|
||||
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
|
||||
baseAddress = reinterpret_cast<uintptr_t>(address);
|
||||
bufferSizeForSsh -= offset;
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
|
||||
}
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
void *surfaceStateAddress = nullptr;
|
||||
|
||||
if (NEO::isValidOffset(argInfo.bindful)) {
|
||||
surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), argInfo.bindful);
|
||||
args.inTemplateMemory = surfaceStateAddress;
|
||||
} else if (NEO::isValidOffset(argInfo.bindless)) {
|
||||
state.isBindlessOffsetSet[argIndex] = false;
|
||||
state.usingSurfaceStateHeap[argIndex] = false;
|
||||
if (this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper() && !offsetedAddress) {
|
||||
surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
|
||||
state.isBindlessOffsetSet[argIndex] = true;
|
||||
} else {
|
||||
state.usingSurfaceStateHeap[argIndex] = true;
|
||||
surfaceStateAddress = ptrOffset(state.surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(argInfo.bindless) * this->renderSurfaceStateSize);
|
||||
}
|
||||
}
|
||||
args.outMemory = surfaceStateAddress;
|
||||
|
||||
uint64_t bufferAddressForSsh = baseAddress;
|
||||
bufferSizeForSsh += misalignedSize;
|
||||
bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
|
||||
|
||||
bool l3Enabled = true;
|
||||
// Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
|
||||
// Most commonly this issue will occur with Host Point Allocations from customers.
|
||||
l3Enabled = isL3Capable(*alloc);
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
|
||||
if (allocData && allocData->allocationFlagsProperty.flags.locallyUncachedResource) {
|
||||
l3Enabled = false;
|
||||
}
|
||||
|
||||
if (l3Enabled == false) {
|
||||
this->state.kernelRequiresQueueUncachedMocsCount++;
|
||||
}
|
||||
auto isDebuggerActive = neoDevice->getDebugger() != nullptr;
|
||||
|
||||
args.graphicsAddress = bufferAddressForSsh;
|
||||
if (allocData && allocData->virtualReservationData) {
|
||||
bufferSizeForSsh = MemoryConstants::fullStatefulRegion;
|
||||
}
|
||||
args.size = bufferSizeForSsh;
|
||||
args.mocs = device->getMOCS(l3Enabled, false);
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.allocation = alloc;
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.implicitScaling = device->isImplicitScalingCapable();
|
||||
args.isDebuggerActive = isDebuggerActive;
|
||||
|
||||
device->getGfxCoreHelper().encodeBufferSurfaceState(args);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -169,7 +169,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
ze_result_t setArgSampler(uint32_t argIndex, size_t argSize, const void *argVal);
|
||||
|
||||
virtual void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) = 0;
|
||||
MOCKABLE_VIRTUAL void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc);
|
||||
|
||||
void setInlineSamplers();
|
||||
|
||||
@@ -325,6 +325,7 @@ struct KernelImp : Kernel {
|
||||
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
||||
uintptr_t surfaceStateAlignmentMask = 0;
|
||||
uintptr_t surfaceStateAlignment = 0;
|
||||
size_t renderSurfaceStateSize = 0;
|
||||
|
||||
uint32_t implicitArgsVersion = 0;
|
||||
uint32_t walkerInlineDataSize = 0;
|
||||
|
||||
Reference in New Issue
Block a user