refactor: add kernel properties and move implementations to imp class

Related-To: NEO-15374

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-07-15 13:12:06 +00:00
committed by Compute-Runtime-Automation
parent 377b99e741
commit ef5efeac0c
9 changed files with 98 additions and 38 deletions

View File

@@ -32,10 +32,9 @@ struct KernelHw : public KernelImp {
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
uint64_t baseAddress = alloc->getGpuAddressToPatch();
auto sshAlignmentMask = NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignmentMask();
// Remove misaligned bytes, accounted for in bufferOffset patch token
baseAddress &= sshAlignmentMask;
baseAddress &= this->surfaceStateAlignmentMask;
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
@@ -52,7 +51,7 @@ struct KernelHw : public KernelImp {
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
baseAddress = reinterpret_cast<uintptr_t>(address);
bufferSizeForSsh -= offset;
DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask));
DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
offset = 0;
}
@@ -76,9 +75,8 @@ struct KernelHw : public KernelImp {
}
uint64_t bufferAddressForSsh = baseAddress;
auto alignment = NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment();
bufferSizeForSsh += misalignedSize;
bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment);
bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
bool l3Enabled = true;
// Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
@@ -114,38 +112,6 @@ struct KernelHw : public KernelImp {
UNRECOVERABLE_IF(surfaceStateAddress == nullptr);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
size_t localWorkSizes[3];
localWorkSizes[0] = this->groupSize[0];
localWorkSizes[1] = this->groupSize[1];
localWorkSizes[2] = this->groupSize[2];
kernelRequiresGenerationOfLocalIdsByRuntime = NEO::EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(
kernelDescriptor.kernelAttributes.numLocalIdChannels,
localWorkSizes,
std::array<uint8_t, 3>{
{kernelDescriptor.kernelAttributes.workgroupWalkOrder[0],
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1],
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}},
kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder,
requiredWorkgroupOrder,
kernelDescriptor.kernelAttributes.simdSize);
}
uint32_t getIndirectSize() const override {
uint32_t totalPayloadSize = getCrossThreadDataSize() + getPerThreadDataSizeForWholeThreadGroup();
if (getKernelDescriptor().kernelAttributes.flags.passInlineData) {
if (totalPayloadSize > GfxFamily::DefaultWalkerType::getInlineDataSize()) {
totalPayloadSize -= GfxFamily::DefaultWalkerType::getInlineDataSize();
} else {
totalPayloadSize = 0;
}
}
return totalPayloadSize;
}
};
} // namespace L0

View File

@@ -1078,6 +1078,9 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->rcsAvailable = gfxHelper.isRcsAvailable(hwInfo);
this->cooperativeSupport = productHelper.isCooperativeEngineSupported(hwInfo);
this->walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
this->surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
this->surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
if (isaAllocation->getAllocationType() == NEO::AllocationType::kernelIsaInternal && this->kernelImmData->getIsaParentAllocation() == nullptr) {
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
@@ -1621,4 +1624,39 @@ KernelExt *KernelImp::getExtension(uint32_t extensionType) {
return nullptr;
}
uint32_t KernelImp::getIndirectSize() const {
uint32_t totalPayloadSize = getCrossThreadDataSize() + getPerThreadDataSizeForWholeThreadGroup();
if (getKernelDescriptor().kernelAttributes.flags.passInlineData) {
if (totalPayloadSize > this->walkerInlineDataSize) {
totalPayloadSize -= this->walkerInlineDataSize;
} else {
totalPayloadSize = 0;
}
}
return totalPayloadSize;
}
void KernelImp::evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) {
auto &gfxHelper = module->getDevice()->getNEODevice()->getRootDeviceEnvironment().getHelper<NEO::GfxCoreHelper>();
size_t localWorkSizes[3];
localWorkSizes[0] = this->groupSize[0];
localWorkSizes[1] = this->groupSize[1];
localWorkSizes[2] = this->groupSize[2];
std::array<uint8_t, 3> kernelWalkOrder{
kernelDescriptor.kernelAttributes.workgroupWalkOrder[0],
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1],
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]};
kernelRequiresGenerationOfLocalIdsByRuntime = gfxHelper.isRuntimeLocalIdsGenerationRequired(kernelDescriptor.kernelAttributes.numLocalIdChannels,
localWorkSizes,
kernelWalkOrder,
kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder,
requiredWorkgroupOrder,
kernelDescriptor.kernelAttributes.simdSize);
}
} // namespace L0

View File

@@ -239,6 +239,8 @@ struct KernelImp : Kernel {
return kernelArgInfos;
}
uint32_t getIndirectSize() const override;
protected:
KernelImp() = default;
@@ -248,7 +250,7 @@ struct KernelImp : Kernel {
void createPrintfBuffer();
void setAssertBuffer();
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
MOCKABLE_VIRTUAL void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor);
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
ze_result_t validateWorkgroupSize() const;
@@ -268,6 +270,8 @@ struct KernelImp : Kernel {
NEO::GraphicsAllocation *printfBuffer = nullptr;
size_t syncBufferIndex = std::numeric_limits<size_t>::max();
size_t regionGroupBarrierIndex = std::numeric_limits<size_t>::max();
uintptr_t surfaceStateAlignmentMask = 0;
uintptr_t surfaceStateAlignment = 0;
uint32_t groupSize[3] = {0u, 0u, 0u};
uint32_t numThreadsPerThreadGroup = 1u;
@@ -286,6 +290,7 @@ struct KernelImp : Kernel {
uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0;
uint32_t perThreadDataSizeForWholeThreadGroup = 0u;
uint32_t perThreadDataSize = 0u;
uint32_t walkerInlineDataSize = 0;
UnifiedMemoryControls unifiedMemoryControls;
std::vector<uint32_t> slmArgSizes;