mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 16:45:25 +08:00
Refactor creation of buffer surface state 2/n
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
275ec1118e
commit
a7747fece5
@@ -10,8 +10,6 @@
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
@@ -24,7 +22,6 @@ struct EnableL0Gen11 {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
namespace NEO {
|
||||
@@ -23,7 +21,6 @@ struct EnableL0Gen12LP {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
typedef BDWFamily Family;
|
||||
@@ -21,7 +19,6 @@ struct EnableL0Gen8 {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
@@ -24,7 +22,6 @@ struct EnableL0Gen9 {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -25,8 +25,6 @@
|
||||
#include "shared/source/program/kernel_info.h"
|
||||
#include "shared/source/utilities/arrayref.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "level_zero/core/source/debugger/debugger_l0.h"
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
@@ -85,17 +83,33 @@ KernelImmutableData::~KernelImmutableData() {
|
||||
|
||||
inline void patchWithImplicitSurface(ArrayRef<uint8_t> crossThreadData, ArrayRef<uint8_t> surfaceStateHeap,
|
||||
uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation,
|
||||
const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics) {
|
||||
const NEO::ArgDescPointer &ptr, Device &device, bool useGlobalAtomics) {
|
||||
if (false == crossThreadData.empty()) {
|
||||
NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData);
|
||||
}
|
||||
|
||||
if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) {
|
||||
auto surfaceState = surfaceStateHeap.begin() + ptr.bindful;
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getUnderlyingBuffer());
|
||||
auto addressToPatch = allocation.getGpuAddress();
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0,
|
||||
&allocation, 0, 0, useGlobalAtomics, device.getNumGenericSubDevices() > 1);
|
||||
|
||||
auto neoDevice = device.getNEODevice();
|
||||
|
||||
auto &hwInfo = neoDevice->getHardwareInfo();
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
NEO::EncodeSurfaceStateArgs args;
|
||||
args.outMemory = surfaceState;
|
||||
args.size = sizeToPatch;
|
||||
args.graphicsAddress = addressToPatch;
|
||||
args.gmmHelper = neoDevice->getGmmHelper();
|
||||
args.allocation = &allocation;
|
||||
args.useGlobalAtomics = useGlobalAtomics;
|
||||
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
|
||||
args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1;
|
||||
args.mocs = device.getMOCS(true, false);
|
||||
|
||||
hwHelper.encodeBufferSurfaceState(args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,7 +201,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(globalConstBuffer->getGpuAddressToPatch()),
|
||||
*globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
*device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
this->residencyContainer.push_back(globalConstBuffer);
|
||||
} else if (nullptr != globalConstBuffer) {
|
||||
this->residencyContainer.push_back(globalConstBuffer);
|
||||
@@ -199,7 +213,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(globalVarBuffer->getGpuAddressToPatch()),
|
||||
*globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress,
|
||||
*neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
*device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics);
|
||||
this->residencyContainer.push_back(globalVarBuffer);
|
||||
} else if (nullptr != globalVarBuffer) {
|
||||
this->residencyContainer.push_back(globalVarBuffer);
|
||||
@@ -757,7 +771,7 @@ NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation() {
|
||||
|
||||
void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) {
|
||||
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
|
||||
auto neoDevice = module->getDevice()->getNEODevice();
|
||||
auto device = module->getDevice();
|
||||
|
||||
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
|
||||
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
|
||||
@@ -765,7 +779,7 @@ void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocatio
|
||||
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
|
||||
static_cast<uintptr_t>(privateAllocation->getGpuAddressToPatch()),
|
||||
*privateAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress,
|
||||
*neoDevice, kernelAttributes.flags.useGlobalAtomics);
|
||||
*device, kernelAttributes.flags.useGlobalAtomics);
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
@@ -933,7 +947,7 @@ void KernelImp::setDebugSurface() {
|
||||
patchWithImplicitSurface(ArrayRef<uint8_t>(), surfaceStateHeapRef,
|
||||
0,
|
||||
*device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress,
|
||||
*device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
|
||||
*device, getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics);
|
||||
}
|
||||
}
|
||||
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/l0_populate_factory.h"
|
||||
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
|
||||
|
||||
@@ -24,7 +22,6 @@ struct EnableL0XeHpCore {
|
||||
populateFactoryTable<AUBCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<TbxCommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<CommandStreamReceiverHw<Family>>();
|
||||
populateFactoryTable<BufferHw<Family>>();
|
||||
L0::populateFactoryTable<L0::L0HwHelperHw<Family>>();
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user