mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
refactor: add kernel properties and move implementations to imp class
Related-To: NEO-15374 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
377b99e741
commit
ef5efeac0c
@@ -32,10 +32,9 @@ struct KernelHw : public KernelImp {
|
||||
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
||||
uint64_t baseAddress = alloc->getGpuAddressToPatch();
|
||||
auto sshAlignmentMask = NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignmentMask();
|
||||
|
||||
// Remove misaligned bytes, accounted for in bufferOffset patch token
|
||||
baseAddress &= sshAlignmentMask;
|
||||
baseAddress &= this->surfaceStateAlignmentMask;
|
||||
auto misalignedSize = ptrDiff(alloc->getGpuAddressToPatch(), baseAddress);
|
||||
auto offset = ptrDiff(address, reinterpret_cast<void *>(baseAddress));
|
||||
size_t bufferSizeForSsh = alloc->getUnderlyingBufferSize();
|
||||
@@ -52,7 +51,7 @@ struct KernelHw : public KernelImp {
|
||||
offsetedAddress = baseAddress != reinterpret_cast<uintptr_t>(address);
|
||||
baseAddress = reinterpret_cast<uintptr_t>(address);
|
||||
bufferSizeForSsh -= offset;
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask));
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & this->surfaceStateAlignmentMask));
|
||||
|
||||
offset = 0;
|
||||
}
|
||||
@@ -76,9 +75,8 @@ struct KernelHw : public KernelImp {
|
||||
}
|
||||
|
||||
uint64_t bufferAddressForSsh = baseAddress;
|
||||
auto alignment = NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment();
|
||||
bufferSizeForSsh += misalignedSize;
|
||||
bufferSizeForSsh = alignUp(bufferSizeForSsh, alignment);
|
||||
bufferSizeForSsh = alignUp(bufferSizeForSsh, this->surfaceStateAlignment);
|
||||
|
||||
bool l3Enabled = true;
|
||||
// Allocation MUST be cacheline (64 byte) aligned in order to enable L3 caching otherwise Heap corruption will occur coming from the KMD.
|
||||
@@ -114,38 +112,6 @@ struct KernelHw : public KernelImp {
|
||||
UNRECOVERABLE_IF(surfaceStateAddress == nullptr);
|
||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateAddress) = surfaceState;
|
||||
}
|
||||
|
||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
|
||||
size_t localWorkSizes[3];
|
||||
localWorkSizes[0] = this->groupSize[0];
|
||||
localWorkSizes[1] = this->groupSize[1];
|
||||
localWorkSizes[2] = this->groupSize[2];
|
||||
|
||||
kernelRequiresGenerationOfLocalIdsByRuntime = NEO::EncodeDispatchKernel<GfxFamily>::isRuntimeLocalIdsGenerationRequired(
|
||||
kernelDescriptor.kernelAttributes.numLocalIdChannels,
|
||||
localWorkSizes,
|
||||
std::array<uint8_t, 3>{
|
||||
{kernelDescriptor.kernelAttributes.workgroupWalkOrder[0],
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1],
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]}},
|
||||
kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder,
|
||||
requiredWorkgroupOrder,
|
||||
kernelDescriptor.kernelAttributes.simdSize);
|
||||
}
|
||||
|
||||
uint32_t getIndirectSize() const override {
|
||||
uint32_t totalPayloadSize = getCrossThreadDataSize() + getPerThreadDataSizeForWholeThreadGroup();
|
||||
|
||||
if (getKernelDescriptor().kernelAttributes.flags.passInlineData) {
|
||||
if (totalPayloadSize > GfxFamily::DefaultWalkerType::getInlineDataSize()) {
|
||||
totalPayloadSize -= GfxFamily::DefaultWalkerType::getInlineDataSize();
|
||||
} else {
|
||||
totalPayloadSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return totalPayloadSize;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1078,6 +1078,9 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
|
||||
this->rcsAvailable = gfxHelper.isRcsAvailable(hwInfo);
|
||||
this->cooperativeSupport = productHelper.isCooperativeEngineSupported(hwInfo);
|
||||
this->walkerInlineDataSize = gfxHelper.getDefaultWalkerInlineDataSize();
|
||||
this->surfaceStateAlignmentMask = gfxHelper.getSurfaceBaseAddressAlignmentMask();
|
||||
this->surfaceStateAlignment = gfxHelper.getSurfaceBaseAddressAlignment();
|
||||
|
||||
if (isaAllocation->getAllocationType() == NEO::AllocationType::kernelIsaInternal && this->kernelImmData->getIsaParentAllocation() == nullptr) {
|
||||
isaAllocation->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
||||
@@ -1621,4 +1624,39 @@ KernelExt *KernelImp::getExtension(uint32_t extensionType) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
uint32_t KernelImp::getIndirectSize() const {
|
||||
uint32_t totalPayloadSize = getCrossThreadDataSize() + getPerThreadDataSizeForWholeThreadGroup();
|
||||
|
||||
if (getKernelDescriptor().kernelAttributes.flags.passInlineData) {
|
||||
if (totalPayloadSize > this->walkerInlineDataSize) {
|
||||
totalPayloadSize -= this->walkerInlineDataSize;
|
||||
} else {
|
||||
totalPayloadSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return totalPayloadSize;
|
||||
}
|
||||
|
||||
void KernelImp::evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) {
|
||||
auto &gfxHelper = module->getDevice()->getNEODevice()->getRootDeviceEnvironment().getHelper<NEO::GfxCoreHelper>();
|
||||
|
||||
size_t localWorkSizes[3];
|
||||
localWorkSizes[0] = this->groupSize[0];
|
||||
localWorkSizes[1] = this->groupSize[1];
|
||||
localWorkSizes[2] = this->groupSize[2];
|
||||
|
||||
std::array<uint8_t, 3> kernelWalkOrder{
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[0],
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[1],
|
||||
kernelDescriptor.kernelAttributes.workgroupWalkOrder[2]};
|
||||
|
||||
kernelRequiresGenerationOfLocalIdsByRuntime = gfxHelper.isRuntimeLocalIdsGenerationRequired(kernelDescriptor.kernelAttributes.numLocalIdChannels,
|
||||
localWorkSizes,
|
||||
kernelWalkOrder,
|
||||
kernelDescriptor.kernelAttributes.flags.requiresWorkgroupWalkOrder,
|
||||
requiredWorkgroupOrder,
|
||||
kernelDescriptor.kernelAttributes.simdSize);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -239,6 +239,8 @@ struct KernelImp : Kernel {
|
||||
return kernelArgInfos;
|
||||
}
|
||||
|
||||
uint32_t getIndirectSize() const override;
|
||||
|
||||
protected:
|
||||
KernelImp() = default;
|
||||
|
||||
@@ -248,7 +250,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
void createPrintfBuffer();
|
||||
void setAssertBuffer();
|
||||
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
||||
MOCKABLE_VIRTUAL void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor);
|
||||
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
|
||||
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
|
||||
ze_result_t validateWorkgroupSize() const;
|
||||
@@ -268,6 +270,8 @@ struct KernelImp : Kernel {
|
||||
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
||||
size_t syncBufferIndex = std::numeric_limits<size_t>::max();
|
||||
size_t regionGroupBarrierIndex = std::numeric_limits<size_t>::max();
|
||||
uintptr_t surfaceStateAlignmentMask = 0;
|
||||
uintptr_t surfaceStateAlignment = 0;
|
||||
|
||||
uint32_t groupSize[3] = {0u, 0u, 0u};
|
||||
uint32_t numThreadsPerThreadGroup = 1u;
|
||||
@@ -286,6 +290,7 @@ struct KernelImp : Kernel {
|
||||
uint32_t perThreadDataSizeForWholeThreadGroupAllocated = 0;
|
||||
uint32_t perThreadDataSizeForWholeThreadGroup = 0u;
|
||||
uint32_t perThreadDataSize = 0u;
|
||||
uint32_t walkerInlineDataSize = 0;
|
||||
|
||||
UnifiedMemoryControls unifiedMemoryControls;
|
||||
std::vector<uint32_t> slmArgSizes;
|
||||
|
||||
Reference in New Issue
Block a user