Files
compute-runtime/shared/source/kernel/dispatch_kernel_encoder_interface.h
Maciej Bielski 97e7cda912 feature: Optimize intra-module kernel ISA allocations
So far, there is a separate page allocated for each kernel's ISA within
`KernelImmutableData::initialize()`. Apparently the ISA blocks are often
much smaller than a 64k page, which leads to poor memory utilization and
was even observed to cause the device OOM error if a single module has
several keys.

Improve the situation by reusing the parent allocation (owned by the
module instance) for modules, which kernel ISAs can fit together within
a single 64k page. This improves the memory utilization on a single
module level.

Related-To: NEO-7788
Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
2023-09-21 13:55:45 +02:00

53 lines
1.6 KiB
C++

/*
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include <cstdint>
namespace NEO {
class GraphicsAllocation;
struct ImplicitArgs;
struct KernelDescriptor;
enum class SlmPolicy {
SlmPolicyNone,
SlmPolicyLargeSlm,
SlmPolicyLargeData
};
struct DispatchKernelEncoderI {
virtual ~DispatchKernelEncoderI() = default;
virtual const KernelDescriptor &getKernelDescriptor() const = 0;
virtual const uint32_t *getGroupSize() const = 0;
virtual uint32_t getSlmTotalSize() const = 0;
virtual SlmPolicy getSlmPolicy() const = 0;
virtual const uint8_t *getCrossThreadData() const = 0;
virtual uint32_t getCrossThreadDataSize() const = 0;
virtual uint32_t getThreadExecutionMask() const = 0;
virtual uint32_t getNumThreadsPerThreadGroup() const = 0;
virtual const uint8_t *getPerThreadData() const = 0;
virtual uint32_t getPerThreadDataSize() const = 0;
virtual uint32_t getPerThreadDataSizeForWholeThreadGroup() const = 0;
virtual const uint8_t *getSurfaceStateHeapData() const = 0;
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
virtual GraphicsAllocation *getIsaAllocation() const = 0;
virtual uint64_t getIsaOffsetInParentAllocation() const = 0;
virtual const uint8_t *getDynamicStateHeapData() const = 0;
virtual uint32_t getRequiredWorkgroupOrder() const = 0;
virtual bool requiresGenerationOfLocalIdsByRuntime() const = 0;
virtual ImplicitArgs *getImplicitArgs() const = 0;
virtual void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const = 0;
};
} // namespace NEO