mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-28 00:03:14 +08:00
Each host function gets its unique ID within a CSR, uses 1 mi store to write ID - to signal that host function is ready, and 1 mi semaphore wait will wait for the ID to be cleared, Use 0th bit from ID as pending/completed flag, host function ID is incremented by 2, and starts with 1. So each ID will always have 0bit set. This is a must have since semaphore wait can wait for 4 bytes only. Adjust command buffer programming and patching logic to IDs. Add hostFunction callable class - using invoke method, which stores required information about callback. Add host function streamer - stores all host function data for a given CSR. All user provided host functions are stored in unordered map, where key is host function ID. Add host function scheduler, and a thread pool - under debug flag Single threaded scheduler loops over all registered host function streamers, dispatch ready to execute host functions to thread pool. Allow for out of order host functions execution for OOQ - under debug flag, each host function has bool isInOrder flag which indicates if it can be executed Out Of Order - in this mode, ID tag will be cleared immediately, so semaphore wait will unblock before the host function execution. Remove Host Function worker CV and atomics based implementation. Rename classes Related-To: NEO-14577 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
159 lines
5.4 KiB
C++
159 lines
5.4 KiB
C++
/*
|
|
* Copyright (C) 2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "shared/source/command_stream/host_function.h"
|
|
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/command_stream/host_function_interface.h"
|
|
#include "shared/source/command_stream/host_function_scheduler.h"
|
|
#include "shared/source/command_stream/host_function_worker_counting_semaphore.h"
|
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
|
#include "shared/source/execution_environment/root_device_environment.h"
|
|
#include "shared/source/memory_manager/graphics_allocation.h"
|
|
|
|
namespace NEO {
|
|
HostFunctionStreamer::HostFunctionStreamer(GraphicsAllocation *allocation,
|
|
void *hostFunctionIdAddress,
|
|
const std::function<void(GraphicsAllocation &)> &downloadAllocationImpl,
|
|
bool isTbx)
|
|
: hostFunctionIdAddress(reinterpret_cast<volatile uint64_t *>(hostFunctionIdAddress)),
|
|
allocation(allocation),
|
|
downloadAllocationImpl(downloadAllocationImpl),
|
|
nextHostFunctionId(1), // start from 1 to keep 0 bit for pending/completed status
|
|
isTbx(isTbx) {
|
|
}
|
|
|
|
uint64_t HostFunctionStreamer::getHostFunctionIdGpuAddress() const {
|
|
return reinterpret_cast<uint64_t>(hostFunctionIdAddress);
|
|
}
|
|
|
|
volatile uint64_t *HostFunctionStreamer::getHostFunctionIdPtr() const {
|
|
return hostFunctionIdAddress;
|
|
}
|
|
|
|
uint64_t HostFunctionStreamer::getNextHostFunctionIdAndIncrement() {
|
|
// increment by 2 to keep 0 bit for pending/completed status
|
|
return nextHostFunctionId.fetch_add(2, std::memory_order_acq_rel);
|
|
}
|
|
|
|
uint64_t HostFunctionStreamer::getHostFunctionId() const {
|
|
return *hostFunctionIdAddress;
|
|
}
|
|
|
|
void HostFunctionStreamer::signalHostFunctionCompletion(const HostFunction &hostFunction) {
|
|
if (hostFunction.isInOrder) {
|
|
*hostFunctionIdAddress = HostFunctionStatus::completed;
|
|
isBusy.store(false, std::memory_order_release);
|
|
}
|
|
}
|
|
|
|
void HostFunctionStreamer::prepareForExecution(const HostFunction &hostFunction) {
|
|
if (hostFunction.isInOrder) {
|
|
isBusy.store(true, std::memory_order_release);
|
|
} else {
|
|
*hostFunctionIdAddress = HostFunctionStatus::completed;
|
|
}
|
|
|
|
pendingHostFunctions.fetch_sub(1, std::memory_order_acq_rel);
|
|
}
|
|
|
|
HostFunction HostFunctionStreamer::getHostFunction() {
|
|
std::unique_lock lock(hostFunctionsMutex);
|
|
auto hostFunctionId = getHostFunctionId();
|
|
auto node = hostFunctions.extract(hostFunctionId);
|
|
if (!node) {
|
|
UNRECOVERABLE_IF(true);
|
|
return HostFunction{};
|
|
}
|
|
|
|
return std::move(node.mapped());
|
|
}
|
|
|
|
HostFunction HostFunctionStreamer::getHostFunction(uint64_t hostFunctionId) {
|
|
std::unique_lock lock(hostFunctionsMutex);
|
|
auto node = hostFunctions.extract(hostFunctionId);
|
|
if (!node) {
|
|
UNRECOVERABLE_IF(true);
|
|
return HostFunction{};
|
|
}
|
|
|
|
return std::move(node.mapped());
|
|
}
|
|
|
|
void HostFunctionStreamer::addHostFunction(uint64_t hostFunctionId, HostFunction &&hostFunction) {
|
|
{
|
|
std::unique_lock lock(hostFunctionsMutex);
|
|
hostFunctions.emplace(hostFunctionId, std::move(hostFunction));
|
|
}
|
|
pendingHostFunctions.fetch_add(1, std::memory_order_acq_rel);
|
|
}
|
|
|
|
GraphicsAllocation *HostFunctionStreamer::getHostFunctionIdAllocation() const {
|
|
return allocation;
|
|
}
|
|
|
|
void HostFunctionStreamer::downloadHostFunctionAllocation() const {
|
|
if (isTbx) {
|
|
downloadAllocationImpl(*allocation);
|
|
}
|
|
}
|
|
|
|
uint64_t HostFunctionStreamer::isHostFunctionReadyToExecute() const {
|
|
if (pendingHostFunctions.load(std::memory_order_acquire) == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (isBusy.load(std::memory_order_acquire)) {
|
|
return false;
|
|
}
|
|
|
|
downloadHostFunctionAllocation();
|
|
|
|
auto hostFunctionId = getHostFunctionId();
|
|
return hostFunctionId;
|
|
}
|
|
|
|
namespace HostFunctionFactory {
|
|
void createAndSetHostFunctionWorker(HostFunctionWorkerMode hostFunctionWorkerMode,
|
|
bool skipHostFunctionExecution,
|
|
CommandStreamReceiver *csr,
|
|
RootDeviceEnvironment *rootDeviceEnvironment) {
|
|
|
|
if (csr->getHostFunctionWorker() != nullptr) {
|
|
return;
|
|
}
|
|
|
|
switch (hostFunctionWorkerMode) {
|
|
default:
|
|
case HostFunctionWorkerMode::defaultMode:
|
|
case HostFunctionWorkerMode::countingSemaphore:
|
|
csr->setHostFunctionWorker(new HostFunctionWorkerCountingSemaphore(skipHostFunctionExecution));
|
|
break;
|
|
case HostFunctionWorkerMode::schedulerWithThreadPool: {
|
|
auto scheduler = rootDeviceEnvironment->getHostFunctionScheduler();
|
|
if (scheduler == nullptr) {
|
|
int32_t nWorkers = (debugManager.flags.HostFunctionThreadPoolSize.get() > 0)
|
|
? debugManager.flags.HostFunctionThreadPoolSize.get()
|
|
: HostFunctionThreadPoolHelper::unlimitedThreads;
|
|
|
|
auto createdScheduler = std::make_unique<HostFunctionScheduler>(skipHostFunctionExecution,
|
|
nWorkers);
|
|
|
|
rootDeviceEnvironment->setHostFunctionScheduler(std::move(createdScheduler));
|
|
}
|
|
|
|
scheduler = rootDeviceEnvironment->getHostFunctionScheduler();
|
|
csr->setHostFunctionWorker(scheduler);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace HostFunctionFactory
|
|
|
|
} // namespace NEO
|