refactor: split sync buffer and region allocation creation code

- split the allocation code from command list or kernel
- allow to call allocation code in all parts of the driver

Related-To: NEO-13350

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-11-28 09:59:31 +00:00
committed by Compute-Runtime-Automation
parent f2b0dad964
commit c5ed6bf73c
8 changed files with 71 additions and 36 deletions

View File

@@ -11,12 +11,10 @@
#include "shared/source/device/device.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include <algorithm>
#include "shared/source/program/sync_buffer_handler.h"
namespace NEO {
@@ -125,4 +123,20 @@ bool KernelHelper::isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescrip
return false;
}
std::pair<GraphicsAllocation *, size_t> KernelHelper::getRegionGroupBarrierAllocationOffset(Device &device, const size_t threadGroupCount, const size_t localRegionSize) {
device.allocateSyncBufferHandler();
size_t size = KernelHelper::getRegionGroupBarrierSize(threadGroupCount, localRegionSize);
return device.syncBufferHandler->obtainAllocationAndOffset(size);
}
std::pair<GraphicsAllocation *, size_t> KernelHelper::getSyncBufferAllocationOffset(Device &device, const size_t requestedNumberOfWorkgroups) {
device.allocateSyncBufferHandler();
size_t requiredSize = KernelHelper::getSyncBufferSize(requestedNumberOfWorkgroups);
return device.syncBufferHandler->obtainAllocationAndOffset(requiredSize);
}
} // namespace NEO

View File

@@ -7,14 +7,18 @@
#pragma once
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/definitions/engine_group_types.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
namespace NEO {
class Device;
class GraphicsAllocation;
struct RootDeviceEnvironment;
struct KernelHelper {
@@ -39,6 +43,17 @@ struct KernelHelper {
static ErrorCode checkIfThereIsSpaceForScratchOrPrivate(KernelDescriptor::KernelAttributes attributes, Device *device);
static bool isAnyArgumentPtrByValue(const KernelDescriptor &kernelDescriptor);
static inline size_t getRegionGroupBarrierSize(const size_t threadGroupCount, const size_t localRegionSize) {
return alignUp((threadGroupCount / localRegionSize) * (localRegionSize + 1) * 2 * sizeof(uint32_t), MemoryConstants::cacheLineSize);
}
static std::pair<GraphicsAllocation *, size_t> getRegionGroupBarrierAllocationOffset(Device &device, const size_t threadGroupCount, const size_t localRegionSize);
static inline size_t getSyncBufferSize(const size_t requestedNumberOfWorkgroups) {
return alignUp(std::max(requestedNumberOfWorkgroups, static_cast<size_t>(CommonConstants::minimalSyncBufferSize)), static_cast<size_t>(CommonConstants::maximalSizeOfAtomicType));
}
static std::pair<GraphicsAllocation *, size_t> getSyncBufferAllocationOffset(Device &device, const size_t requestedNumberOfWorkgroups);
};
} // namespace NEO