mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 17:00:59 +08:00
So far, there is a separate page allocated for each kernel's ISA within `KernelImmutableData::initialize()`. Apparently the ISA blocks are often much smaller than a 64k page, which leads to poor memory utilization and was even observed to cause the device OOM error if a single module has several keys. Improve the situation by reusing the parent allocation (owned by the module instance) for modules, which kernel ISAs can fit together within a single 64k page. This improves the memory utilization on a single module level. Related-To: NEO-7788 Signed-off-by: Maciej Bielski <maciej.bielski@intel.com>
53 lines
1.6 KiB
C++
53 lines
1.6 KiB
C++
/*
|
|
* Copyright (C) 2020-2023 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
#include <cstdint>
|
|
|
|
namespace NEO {
|
|
class GraphicsAllocation;
|
|
struct ImplicitArgs;
|
|
struct KernelDescriptor;
|
|
|
|
enum class SlmPolicy {
|
|
SlmPolicyNone,
|
|
SlmPolicyLargeSlm,
|
|
SlmPolicyLargeData
|
|
};
|
|
|
|
struct DispatchKernelEncoderI {
|
|
virtual ~DispatchKernelEncoderI() = default;
|
|
|
|
virtual const KernelDescriptor &getKernelDescriptor() const = 0;
|
|
virtual const uint32_t *getGroupSize() const = 0;
|
|
virtual uint32_t getSlmTotalSize() const = 0;
|
|
virtual SlmPolicy getSlmPolicy() const = 0;
|
|
|
|
virtual const uint8_t *getCrossThreadData() const = 0;
|
|
virtual uint32_t getCrossThreadDataSize() const = 0;
|
|
|
|
virtual uint32_t getThreadExecutionMask() const = 0;
|
|
virtual uint32_t getNumThreadsPerThreadGroup() const = 0;
|
|
virtual const uint8_t *getPerThreadData() const = 0;
|
|
virtual uint32_t getPerThreadDataSize() const = 0;
|
|
virtual uint32_t getPerThreadDataSizeForWholeThreadGroup() const = 0;
|
|
|
|
virtual const uint8_t *getSurfaceStateHeapData() const = 0;
|
|
virtual uint32_t getSurfaceStateHeapDataSize() const = 0;
|
|
|
|
virtual GraphicsAllocation *getIsaAllocation() const = 0;
|
|
virtual uint64_t getIsaOffsetInParentAllocation() const = 0;
|
|
virtual const uint8_t *getDynamicStateHeapData() const = 0;
|
|
|
|
virtual uint32_t getRequiredWorkgroupOrder() const = 0;
|
|
virtual bool requiresGenerationOfLocalIdsByRuntime() const = 0;
|
|
|
|
virtual ImplicitArgs *getImplicitArgs() const = 0;
|
|
virtual void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const = 0;
|
|
};
|
|
} // namespace NEO
|