Refactor creation of buffer surface state 2/n

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-10-21 14:30:54 +00:00
committed by Compute-Runtime-Automation
parent 275ec1118e
commit a7747fece5
8 changed files with 35 additions and 26 deletions

View File

@@ -10,8 +10,6 @@
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/mem_obj/buffer.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
@@ -24,7 +22,6 @@ struct EnableL0Gen11 {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
}
};

View File

@@ -10,8 +10,6 @@
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/mem_obj/buffer.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
namespace NEO {
@@ -23,7 +21,6 @@ struct EnableL0Gen12LP {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
}
};

View File

@@ -10,8 +10,6 @@
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/mem_obj/buffer.h"
namespace NEO {
typedef BDWFamily Family;
@@ -21,7 +19,6 @@ struct EnableL0Gen8 {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
}
};

View File

@@ -10,8 +10,6 @@
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/mem_obj/buffer.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
@@ -24,7 +22,6 @@ struct EnableL0Gen9 {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
}
};

View File

@@ -25,8 +25,6 @@
#include "shared/source/program/kernel_info.h"
#include "shared/source/utilities/arrayref.h"
#include "opencl/source/mem_obj/buffer.h"
#include "level_zero/core/source/debugger/debugger_l0.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/device/device_imp.h"
@@ -85,17 +83,33 @@ KernelImmutableData::~KernelImmutableData() {
inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap,
uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation,
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics) {
const NEO::ArgDescPointer &ptr, Device &device, bool useGlobalAtomics) {
if (false == crossThreadData.empty()) {
NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData);
}
if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) {
auto surfaceState = surfaceStateHeap.begin() + ptr.bindful;
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
auto addressToPatch = allocation.getGpuAddress();
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
&allocation, 0, 0, useGlobalAtomics, device.getNumGenericSubDevices() > 1);
auto neoDevice = device.getNEODevice();
auto &hwInfo = neoDevice->getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
NEO::EncodeSurfaceStateArgs args;
args.outMemory = surfaceState;
args.size = sizeToPatch;
args.graphicsAddress = addressToPatch;
args.gmmHelper = neoDevice->getGmmHelper();
args.allocation = &allocation;
args.useGlobalAtomics = useGlobalAtomics;
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
args.mocs = device.getMOCS(true, false);
hwHelper.encodeBufferSurfaceState(args);
}
}
@@ -187,7 +201,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
static_cast<uintptr_t>(globalConstBuffer->getGpuAddressToPatch()),
*globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
*device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
this->residencyContainer.push_back(globalConstBuffer);
} else if (nullptr != globalConstBuffer) {
this->residencyContainer.push_back(globalConstBuffer);
@@ -199,7 +213,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
static_cast<uintptr_t>(globalVarBuffer->getGpuAddressToPatch()),
*globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
*device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
this->residencyContainer.push_back(globalVarBuffer);
} else if (nullptr != globalVarBuffer) {
this->residencyContainer.push_back(globalVarBuffer);
@@ -757,7 +771,7 @@ NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation() {
void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) {
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
auto neoDevice = module->getDevice()->getNEODevice();
auto device = module->getDevice();
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
@@ -765,7 +779,7 @@ void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocatio
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
static_cast<uintptr_t>(privateAllocation->getGpuAddressToPatch()),
*privateAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress,
*neoDevice, kernelAttributes.flags.useGlobalAtomics);
*device, kernelAttributes.flags.useGlobalAtomics);
}
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
@@ -933,7 +947,7 @@ void KernelImp::setDebugSurface() {
patchWithImplicitSurface(ArrayRef<uint8_t>(), surfaceStateHeapRef,
0,
*device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress,
*device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
*device, getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
}
}
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {

View File

@@ -10,8 +10,6 @@
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/mem_obj/buffer.h"
#include "level_zero/core/source/helpers/l0_populate_factory.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
@@ -24,7 +22,6 @@ struct EnableL0XeHpCore {
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
populateFactoryTable<CommandStreamReceiverHw<Family>>();
populateFactoryTable<BufferHw<Family>>();
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
}
};