mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Encode dispatch kernel with global bindless heaps
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
be90b9ff93
commit
7a5c9d39b5
@@ -21,6 +21,7 @@ namespace NEO {
|
||||
class GmmHelper;
|
||||
struct HardwareInfo;
|
||||
class IndirectHeap;
|
||||
class BindlessHeapsHelper;
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct EncodeDispatchKernel {
|
||||
@@ -46,8 +47,6 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static size_t estimateEncodeDispatchKernelCmdsSize(Device *device);
|
||||
|
||||
static void patchBindlessSurfaceStateOffsets(const size_t sshOffset, const KernelDescriptor &kernelDesc, uint8_t *crossThread);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
size_t *lws,
|
||||
std::array<uint8_t, 3> walkOrder,
|
||||
@@ -84,6 +83,7 @@ struct EncodeStates {
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
using SAMPLER_BORDER_COLOR_STATE = typename GfxFamily::SAMPLER_BORDER_COLOR_STATE;
|
||||
|
||||
static const uint32_t alignIndirectStatePointer = MemoryConstants::cacheLineSize;
|
||||
static const size_t alignInterfaceDescriptorData = MemoryConstants::cacheLineSize;
|
||||
@@ -92,7 +92,8 @@ struct EncodeStates {
|
||||
uint32_t samplerStateOffset,
|
||||
uint32_t samplerCount,
|
||||
uint32_t borderColorOffset,
|
||||
const void *fnDynamicStateHeap);
|
||||
const void *fnDynamicStateHeap,
|
||||
BindlessHeapsHelper *bindlessHeapHelper);
|
||||
|
||||
static void adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency);
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
@@ -30,22 +32,44 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
||||
uint32_t samplerStateOffset,
|
||||
uint32_t samplerCount,
|
||||
uint32_t borderColorOffset,
|
||||
const void *fnDynamicStateHeap) {
|
||||
const void *fnDynamicStateHeap,
|
||||
BindlessHeapsHelper *bindlessHeapHelper) {
|
||||
auto sizeSamplerState = sizeof(SAMPLER_STATE) * samplerCount;
|
||||
auto borderColorSize = samplerStateOffset - borderColorOffset;
|
||||
|
||||
SAMPLER_STATE *dstSamplerState = nullptr;
|
||||
uint32_t samplerStateOffsetInDsh = 0;
|
||||
|
||||
dsh->align(EncodeStates<Family>::alignIndirectStatePointer);
|
||||
auto borderColorOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
|
||||
uint32_t borderColorOffsetInDsh = 0;
|
||||
if (!ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
borderColorOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
|
||||
auto borderColor = dsh->getSpace(borderColorSize);
|
||||
|
||||
auto borderColor = dsh->getSpace(borderColorSize);
|
||||
memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset),
|
||||
borderColorSize);
|
||||
|
||||
memcpy_s(borderColor, borderColorSize, ptrOffset(fnDynamicStateHeap, borderColorOffset),
|
||||
borderColorSize);
|
||||
dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
samplerStateOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
|
||||
|
||||
dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
auto samplerStateOffsetInDsh = static_cast<uint32_t>(dsh->getUsed());
|
||||
|
||||
auto dstSamplerState = reinterpret_cast<SAMPLER_STATE *>(dsh->getSpace(sizeSamplerState));
|
||||
dstSamplerState = reinterpret_cast<SAMPLER_STATE *>(dsh->getSpace(sizeSamplerState));
|
||||
} else {
|
||||
auto borderColor = reinterpret_cast<const SAMPLER_BORDER_COLOR_STATE *>(ptrOffset(fnDynamicStateHeap, borderColorOffset));
|
||||
if (borderColor->getBorderColorRed() != 0.0f ||
|
||||
borderColor->getBorderColorGreen() != 0.0f ||
|
||||
borderColor->getBorderColorBlue() != 0.0f ||
|
||||
(borderColor->getBorderColorAlpha() != 0.0f && borderColor->getBorderColorAlpha() != 1.0f)) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
} else if (borderColor->getBorderColorAlpha() == 0.0f) {
|
||||
borderColorOffsetInDsh = bindlessHeapHelper->getDefaultBorderColorOffset();
|
||||
} else {
|
||||
borderColorOffsetInDsh = bindlessHeapHelper->getAlphaBorderColorOffset();
|
||||
}
|
||||
dsh->align(INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE);
|
||||
auto samplerStateInDsh = bindlessHeapHelper->allocateSSInHeap(sizeSamplerState, nullptr, BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH);
|
||||
dstSamplerState = reinterpret_cast<SAMPLER_STATE *>(samplerStateInDsh.ssPtr);
|
||||
samplerStateOffsetInDsh = static_cast<uint32_t>(samplerStateInDsh.surfaceStateOffset);
|
||||
}
|
||||
|
||||
auto srcSamplerState = reinterpret_cast<const SAMPLER_STATE *>(ptrOffset(fnDynamicStateHeap, samplerStateOffset));
|
||||
SAMPLER_STATE state = {};
|
||||
@@ -56,7 +80,7 @@ uint32_t EncodeStates<Family>::copySamplerState(IndirectHeap *dsh,
|
||||
}
|
||||
|
||||
return samplerStateOffsetInDsh;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeStates<Family>::getAdjustStateComputeModeSize() {
|
||||
@@ -382,40 +406,6 @@ void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &con
|
||||
return &interfaceDescriptorData[container.nextIddInBlock++];
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::patchBindlessSurfaceStateOffsets(const size_t sshOffset, const KernelDescriptor &kernelDesc, uint8_t *crossThread) {
|
||||
auto &hwHelper = HwHelperHw<Family>::get();
|
||||
|
||||
for (const auto &argT : kernelDesc.payloadMappings.explicitArgs) {
|
||||
CrossThreadDataOffset bindless = undefined<CrossThreadDataOffset>;
|
||||
SurfaceStateHeapOffset bindful = undefined<SurfaceStateHeapOffset>;
|
||||
|
||||
switch (argT.type) {
|
||||
case ArgDescriptor::ArgTPointer: {
|
||||
auto &arg = argT.as<NEO::ArgDescPointer>();
|
||||
bindless = arg.bindless;
|
||||
bindful = arg.bindful;
|
||||
} break;
|
||||
|
||||
case ArgDescriptor::ArgTImage: {
|
||||
auto &arg = argT.as<NEO::ArgDescImage>();
|
||||
bindless = arg.bindless;
|
||||
bindful = arg.bindful;
|
||||
} break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (NEO::isValidOffset(bindless)) {
|
||||
auto patchLocation = ptrOffset(crossThread, bindless);
|
||||
auto bindlessOffset = static_cast<uint32_t>(sshOffset) + bindful;
|
||||
auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(bindlessOffset);
|
||||
patchWithRequiredSize(patchLocation, sizeof(patchValue), patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(const KernelDescriptor &kernelDesc) {
|
||||
auto checkKernelForInlineData = true;
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
#include "shared/source/helpers/state_base_address.h"
|
||||
@@ -79,22 +80,25 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
bool isBindlessKernel = kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindlessAndStateless;
|
||||
if (!isBindlessKernel) {
|
||||
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, preemptionMode);
|
||||
|
||||
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
|
||||
uint32_t samplerStateOffset = 0;
|
||||
@@ -105,7 +109,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
|
||||
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
|
||||
kernelDescriptor.payloadMappings.samplerTable.borderColor,
|
||||
dispatchInterface->getDynamicStateHeapData());
|
||||
dispatchInterface->getDynamicStateHeapData(),
|
||||
device->getBindlessHeapsHelper());
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
@@ -139,10 +147,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(container, kernelDescriptor.payloadMappings.dispatchTraits.globalWorkSize, gpuPtr, dispatchInterface->getGroupSize());
|
||||
}
|
||||
|
||||
if (kernelDescriptor.payloadMappings.bindingTable.numEntries > 0) {
|
||||
patchBindlessSurfaceStateOffsets(sshOffset, dispatchInterface->getKernelDescriptor(), reinterpret_cast<uint8_t *>(ptr));
|
||||
}
|
||||
|
||||
ptr = ptrOffset(ptr, sizeCrossThreadData);
|
||||
memcpy_s(ptr, sizePerThreadDataForWholeGroup,
|
||||
dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
|
||||
|
||||
Reference in New Issue
Block a user