diff --git a/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp b/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp index 67b2087ad2..4b54db04f6 100644 --- a/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp +++ b/level_zero/core/source/gen11/enable_family_full_l0_gen11.cpp @@ -10,8 +10,6 @@ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/populate_factory.h" -#include "opencl/source/mem_obj/buffer.h" - #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" @@ -24,7 +22,6 @@ struct EnableL0Gen11 { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); - populateFactoryTable>(); L0::populateFactoryTable>(); } }; diff --git a/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp b/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp index d6881b4bf9..839d6ff3f5 100644 --- a/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp +++ b/level_zero/core/source/gen12lp/enable_family_full_l0_gen12lp.cpp @@ -10,8 +10,6 @@ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/populate_factory.h" -#include "opencl/source/mem_obj/buffer.h" - #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" namespace NEO { @@ -23,7 +21,6 @@ struct EnableL0Gen12LP { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); - populateFactoryTable>(); L0::populateFactoryTable>(); } }; diff --git a/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp b/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp index 13a7f26380..4f5bef0094 100644 --- a/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp +++ b/level_zero/core/source/gen8/enable_family_full_l0_gen8.cpp @@ -10,8 +10,6 @@ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/populate_factory.h" -#include "opencl/source/mem_obj/buffer.h" - namespace NEO { typedef BDWFamily Family; @@ -21,7 +19,6 @@ struct EnableL0Gen8 { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); - populateFactoryTable>(); } }; diff --git a/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp b/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp index dbf4a0b2c0..8a6ac121d1 100644 --- a/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp +++ b/level_zero/core/source/gen9/enable_family_full_l0_gen9.cpp @@ -10,8 +10,6 @@ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/populate_factory.h" -#include "opencl/source/mem_obj/buffer.h" - #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" @@ -24,7 +22,6 @@ struct EnableL0Gen9 { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); - populateFactoryTable>(); L0::populateFactoryTable>(); } }; diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 857a2649a1..4ecfa23367 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -25,8 +25,6 @@ #include "shared/source/program/kernel_info.h" #include "shared/source/utilities/arrayref.h" -#include "opencl/source/mem_obj/buffer.h" - #include "level_zero/core/source/debugger/debugger_l0.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" @@ -85,17 +83,33 @@ KernelImmutableData::~KernelImmutableData() { inline void patchWithImplicitSurface(ArrayRef crossThreadData, ArrayRef surfaceStateHeap, uintptr_t ptrToPatchInCrossThreadData, NEO::GraphicsAllocation &allocation, - const NEO::ArgDescPointer &ptr, const NEO::Device &device, bool useGlobalAtomics) { + const NEO::ArgDescPointer &ptr, Device &device, bool useGlobalAtomics) { if (false == crossThreadData.empty()) { NEO::patchPointer(crossThreadData, ptr, ptrToPatchInCrossThreadData); } if ((false == surfaceStateHeap.empty()) && (NEO::isValidOffset(ptr.bindful))) { auto surfaceState = surfaceStateHeap.begin() + ptr.bindful; - void *addressToPatch = reinterpret_cast(allocation.getUnderlyingBuffer()); + auto addressToPatch = allocation.getGpuAddress(); size_t sizeToPatch = allocation.getUnderlyingBufferSize(); - NEO::Buffer::setSurfaceState(&device, surfaceState, false, false, sizeToPatch, addressToPatch, 0, - &allocation, 0, 0, useGlobalAtomics, device.getNumGenericSubDevices() > 1); + + auto neoDevice = device.getNEODevice(); + + auto &hwInfo = neoDevice->getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + + NEO::EncodeSurfaceStateArgs args; + args.outMemory = surfaceState; + args.size = sizeToPatch; + args.graphicsAddress = addressToPatch; + args.gmmHelper = neoDevice->getGmmHelper(); + args.allocation = &allocation; + args.useGlobalAtomics = useGlobalAtomics; + args.numAvailableDevices = neoDevice->getNumGenericSubDevices(); + args.areMultipleSubDevicesInContext = args.numAvailableDevices > 1; + args.mocs = device.getMOCS(true, false); + + hwHelper.encodeBufferSurfaceState(args); } } @@ -187,7 +201,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalConstBuffer->getGpuAddressToPatch()), *globalConstBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress, - *neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics); + *device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics); this->residencyContainer.push_back(globalConstBuffer); } else if (nullptr != globalConstBuffer) { this->residencyContainer.push_back(globalConstBuffer); @@ -199,7 +213,7 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(globalVarBuffer->getGpuAddressToPatch()), *globalVarBuffer, kernelDescriptor->payloadMappings.implicitArgs.globalVariablesSurfaceAddress, - *neoDevice, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics); + *device, kernelDescriptor->kernelAttributes.flags.useGlobalAtomics); this->residencyContainer.push_back(globalVarBuffer); } else if (nullptr != globalVarBuffer) { this->residencyContainer.push_back(globalVarBuffer); @@ -757,7 +771,7 @@ NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation() { void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocation *privateAllocation) { auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes; - auto neoDevice = module->getDevice()->getNEODevice(); + auto device = module->getDevice(); ArrayRef crossThredDataArrayRef = ArrayRef(this->crossThreadData.get(), this->crossThreadDataSize); ArrayRef surfaceStateHeapArrayRef = ArrayRef(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize); @@ -765,7 +779,7 @@ void KernelImp::patchCrossthreadDataWithPrivateAllocation(NEO::GraphicsAllocatio patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef, static_cast(privateAllocation->getGpuAddressToPatch()), *privateAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, - *neoDevice, kernelAttributes.flags.useGlobalAtomics); + *device, kernelAttributes.flags.useGlobalAtomics); } ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { @@ -933,7 +947,7 @@ void KernelImp::setDebugSurface() { patchWithImplicitSurface(ArrayRef(), surfaceStateHeapRef, 0, *device->getDebugSurface(), this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.systemThreadSurfaceAddress, - *device->getNEODevice(), getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics); + *device, getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics); } } void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) { diff --git a/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp b/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp index a66dc6c7da..7a94be2e42 100644 --- a/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp +++ b/level_zero/core/source/xe_hp_core/enable_family_full_l0_xe_hp_core.cpp @@ -10,8 +10,6 @@ #include "shared/source/command_stream/tbx_command_stream_receiver_hw.h" #include "shared/source/helpers/populate_factory.h" -#include "opencl/source/mem_obj/buffer.h" - #include "level_zero/core/source/helpers/l0_populate_factory.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" @@ -24,7 +22,6 @@ struct EnableL0XeHpCore { populateFactoryTable>(); populateFactoryTable>(); populateFactoryTable>(); - populateFactoryTable>(); L0::populateFactoryTable>(); } }; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 2f6fdf6083..954d62a37b 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -31,6 +31,7 @@ class TagAllocatorBase; class Gmm; struct AllocationData; struct AllocationProperties; +struct EncodeSurfaceStateArgs; struct EngineControl; struct RootDeviceEnvironment; struct PipeControlArgs; @@ -147,6 +148,7 @@ class HwHelper { virtual size_t getMax3dImageWidthOrHeight() const = 0; virtual uint64_t getMaxMemAllocSize() const = 0; virtual bool isStatelesToStatefullWithOffsetSupported() const = 0; + virtual void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) = 0; protected: HwHelper() = default; @@ -369,6 +371,7 @@ class HwHelperHw : public HwHelper { size_t getMax3dImageWidthOrHeight() const override; uint64_t getMaxMemAllocSize() const override; bool isStatelesToStatefullWithOffsetSupported() const override; + void encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) override; protected: static const AuxTranslationMode defaultAuxTranslationMode; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 93ec8876d4..39f105ac96 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_container/command_encoder.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" @@ -627,4 +628,10 @@ template uint64_t HwHelperHw::getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const { return reinterpret_cast(renderSurfaceState)->getSurfaceBaseAddress(); } + +template +void HwHelperHw::encodeBufferSurfaceState(EncodeSurfaceStateArgs &args) { + EncodeSurfaceState::encodeBuffer(args); +} + } // namespace NEO