mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-01 12:33:12 +08:00
Each host function gets its unique ID within a CSR, uses 1 mi store to write ID - to signal that host function is ready, and 1 mi semaphore wait will wait for the ID to be cleared, Use 0th bit from ID as pending/completed flag, host function ID is incremented by 2, and starts with 1. So each ID will always have 0bit set. This is a must have since semaphore wait can wait for 4 bytes only. Adjust command buffer programming and patching logic to IDs. Add hostFunction callable class - using invoke method, which stores required information about callback. Add host function streamer - stores all host function data for a given CSR. All user provided host functions are stored in unordered map, where key is host function ID. Add host function scheduler, and a thread pool - under debug flag Single threaded scheduler loops over all registered host function streamers, dispatch ready to execute host functions to thread pool. Allow for out of order host functions execution for OOQ - under debug flag, each host function has bool isInOrder flag which indicates if it can be executed Out Of Order - in this mode, ID tag will be cleared immediately, so semaphore wait will unblock before the host function execution. Remove Host Function worker CV and atomics based implementation. Rename classes Related-To: NEO-14577 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
291 lines
14 KiB
C++
291 lines
14 KiB
C++
/*
|
|
* Copyright (C) 2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "shared/source/command_stream/host_function.h"
|
|
#include "shared/source/command_stream/tag_allocation_layout.h"
|
|
#include "shared/source/memory_manager/os_agnostic_memory_manager.h"
|
|
#include "shared/test/common/cmd_parse/hw_parse.h"
|
|
#include "shared/test/common/fixtures/device_fixture.h"
|
|
#include "shared/test/common/helpers/default_hw_info.h"
|
|
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
|
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
|
#include "shared/test/common/test_macros/hw_test.h"
|
|
|
|
#include <cstddef>
|
|
|
|
using namespace NEO;
|
|
|
|
using HostFunctionTests = Test<DeviceFixture>;
|
|
|
|
HWTEST_F(HostFunctionTests, givenHostFunctionDataStoredWhenProgramHostFunctionIsCalledThenMiStoresAndSemaphoreWaitAreProgrammedCorrectlyInCorrectOrder) {
|
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
|
|
|
constexpr auto size = 1024u;
|
|
std::byte buff[size] = {};
|
|
LinearStream stream(buff, size);
|
|
|
|
uint64_t callbackAddress = 1024;
|
|
uint64_t userDataAddress = 2048;
|
|
bool isInOrder = true;
|
|
|
|
HostFunction hostFunction{
|
|
.hostFunctionAddress = callbackAddress,
|
|
.userDataAddress = userDataAddress,
|
|
.isInOrder = true};
|
|
|
|
MockGraphicsAllocation allocation;
|
|
|
|
uint64_t hostFunctionId = 1;
|
|
|
|
std::function<void(GraphicsAllocation &)> downloadAllocationImpl = [](GraphicsAllocation &) {};
|
|
bool isTbx = false;
|
|
|
|
auto hostFunctionStreamer = std::make_unique<HostFunctionStreamer>(&allocation,
|
|
&hostFunctionId,
|
|
downloadAllocationImpl,
|
|
isTbx);
|
|
|
|
HostFunctionHelper<FamilyType>::programHostFunction(stream, *hostFunctionStreamer.get(), std::move(hostFunction));
|
|
|
|
HardwareParse hwParser;
|
|
hwParser.parseCommands<FamilyType>(stream, 0);
|
|
|
|
auto miStores = findAll<MI_STORE_DATA_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
|
EXPECT_EQ(1u, miStores.size());
|
|
|
|
auto miWait = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
|
EXPECT_EQ(1u, miWait.size());
|
|
|
|
// program host function id
|
|
auto expectedHostFunctionId = 1u;
|
|
auto miStoreUserHostFunction = genCmdCast<MI_STORE_DATA_IMM *>(*miStores[0]);
|
|
EXPECT_EQ(reinterpret_cast<uint64_t>(&hostFunctionId), miStoreUserHostFunction->getAddress());
|
|
EXPECT_EQ(getLowPart(expectedHostFunctionId), miStoreUserHostFunction->getDataDword0());
|
|
EXPECT_EQ(getHighPart(expectedHostFunctionId), miStoreUserHostFunction->getDataDword1());
|
|
EXPECT_TRUE(miStoreUserHostFunction->getStoreQword());
|
|
|
|
// program wait for host function completion
|
|
auto miWaitTag = genCmdCast<MI_SEMAPHORE_WAIT *>(*miWait[0]);
|
|
EXPECT_EQ(reinterpret_cast<uint64_t>(&hostFunctionId), miWaitTag->getSemaphoreGraphicsAddress());
|
|
EXPECT_EQ(static_cast<uint32_t>(HostFunctionStatus::completed), miWaitTag->getSemaphoreDataDword());
|
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, miWaitTag->getCompareOperation());
|
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE_POLLING_MODE, miWaitTag->getWaitMode());
|
|
|
|
// host function from host function streamer
|
|
auto programmedHostFunction = hostFunctionStreamer->getHostFunction();
|
|
EXPECT_EQ(callbackAddress, programmedHostFunction.hostFunctionAddress);
|
|
EXPECT_EQ(userDataAddress, programmedHostFunction.userDataAddress);
|
|
EXPECT_EQ(isInOrder, programmedHostFunction.isInOrder);
|
|
}
|
|
|
|
HWTEST_F(HostFunctionTests, givenCommandBufferPassedWhenProgramHostFunctionsAreCalledThenMiStoresAndSemaphoreWaitAreProgrammedCorrectlyInCorrectOrder) {
|
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
|
|
|
MockGraphicsAllocation allocation;
|
|
|
|
uint64_t hostFunctionId = 1;
|
|
|
|
std::function<void(GraphicsAllocation &)> downloadAllocationImpl = [](GraphicsAllocation &) {};
|
|
bool isTbx = false;
|
|
|
|
auto hostFunctionStreamer = std::make_unique<HostFunctionStreamer>(&allocation,
|
|
&hostFunctionId,
|
|
downloadAllocationImpl,
|
|
isTbx);
|
|
|
|
constexpr auto size = 1024u;
|
|
std::byte buff[size] = {};
|
|
|
|
uint64_t callbackAddress = 1024;
|
|
uint64_t userDataAddress = 2048;
|
|
bool isInOrder = true;
|
|
|
|
HostFunction hostFunction{
|
|
.hostFunctionAddress = callbackAddress,
|
|
.userDataAddress = userDataAddress,
|
|
.isInOrder = true};
|
|
|
|
LinearStream commandStream(buff, size);
|
|
|
|
auto miStoreDataImmBuffer1 = commandStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
|
|
HostFunctionHelper<FamilyType>::programHostFunctionId(nullptr, miStoreDataImmBuffer1, *hostFunctionStreamer.get(), std::move(hostFunction));
|
|
|
|
auto semaphoreCommand = commandStream.getSpaceForCmd<MI_SEMAPHORE_WAIT>();
|
|
HostFunctionHelper<FamilyType>::programHostFunctionWaitForCompletion(nullptr, semaphoreCommand, *hostFunctionStreamer.get());
|
|
|
|
HardwareParse hwParser;
|
|
hwParser.parseCommands<FamilyType>(commandStream, 0);
|
|
|
|
auto miStores = findAll<MI_STORE_DATA_IMM *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
|
EXPECT_EQ(1u, miStores.size());
|
|
|
|
auto miWait = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
|
EXPECT_EQ(1u, miWait.size());
|
|
|
|
// program host function id
|
|
auto expectedHostFunctionId = 1u;
|
|
auto miStoreUserHostFunction = genCmdCast<MI_STORE_DATA_IMM *>(*miStores[0]);
|
|
EXPECT_EQ(reinterpret_cast<uint64_t>(&hostFunctionId), miStoreUserHostFunction->getAddress());
|
|
EXPECT_EQ(getLowPart(expectedHostFunctionId), miStoreUserHostFunction->getDataDword0());
|
|
EXPECT_EQ(getHighPart(expectedHostFunctionId), miStoreUserHostFunction->getDataDword1());
|
|
EXPECT_TRUE(miStoreUserHostFunction->getStoreQword());
|
|
|
|
// program wait for host function completion
|
|
auto miWaitTag = genCmdCast<MI_SEMAPHORE_WAIT *>(*miWait[0]);
|
|
EXPECT_EQ(reinterpret_cast<uint64_t>(&hostFunctionId), miWaitTag->getSemaphoreGraphicsAddress());
|
|
EXPECT_EQ(static_cast<uint32_t>(HostFunctionStatus::completed), miWaitTag->getSemaphoreDataDword());
|
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, miWaitTag->getCompareOperation());
|
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE_POLLING_MODE, miWaitTag->getWaitMode());
|
|
|
|
// host function from host function streamer
|
|
auto programmedHostFunction = hostFunctionStreamer->getHostFunction();
|
|
EXPECT_EQ(callbackAddress, programmedHostFunction.hostFunctionAddress);
|
|
EXPECT_EQ(userDataAddress, programmedHostFunction.userDataAddress);
|
|
EXPECT_EQ(isInOrder, programmedHostFunction.isInOrder);
|
|
}
|
|
|
|
HWTEST_F(HostFunctionTests, givenHostFunctionStreamerWhenProgramHostFunctionIsCalledThenHostFunctionStreamerWasUpdatedWithHostFunction) {
|
|
|
|
uint64_t callbackAddress1 = 1024;
|
|
uint64_t userDataAddress1 = 2048;
|
|
uint64_t callbackAddress2 = 4096;
|
|
uint64_t userDataAddress2 = 8192;
|
|
|
|
constexpr auto size = 4096u;
|
|
std::byte buff[size] = {};
|
|
LinearStream stream(buff, size);
|
|
|
|
for (bool isTbx : ::testing::Bool()) {
|
|
|
|
HostFunction hostFunction1{
|
|
.hostFunctionAddress = callbackAddress1,
|
|
.userDataAddress = userDataAddress1,
|
|
.isInOrder = true};
|
|
|
|
HostFunction hostFunction2{
|
|
.hostFunctionAddress = callbackAddress2,
|
|
.userDataAddress = userDataAddress2,
|
|
.isInOrder = false};
|
|
|
|
uint64_t hostFunctionId = HostFunctionStatus::completed;
|
|
uint64_t hostFunctionIdAddress = reinterpret_cast<uint64_t>(&hostFunctionId);
|
|
MockGraphicsAllocation mockAllocation;
|
|
bool downloadAllocationCalled = false;
|
|
std::function<void(GraphicsAllocation &)> downloadAllocationImpl = [&](GraphicsAllocation &) { downloadAllocationCalled = true; };
|
|
|
|
auto hostFunctionStreamer = std::make_unique<HostFunctionStreamer>(&mockAllocation,
|
|
&hostFunctionId,
|
|
downloadAllocationImpl,
|
|
isTbx);
|
|
|
|
EXPECT_FALSE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
|
|
{
|
|
// 1st host function in order
|
|
HostFunctionHelper<FamilyType>::programHostFunction(stream, *hostFunctionStreamer.get(), std::move(hostFunction1));
|
|
hostFunctionId = 1u; // simulate function being processed
|
|
|
|
auto programmedHostFunction1 = hostFunctionStreamer->getHostFunction();
|
|
|
|
EXPECT_EQ(&mockAllocation, hostFunctionStreamer->getHostFunctionIdAllocation());
|
|
EXPECT_EQ(hostFunctionIdAddress, hostFunctionStreamer->getHostFunctionIdGpuAddress());
|
|
|
|
hostFunctionId = HostFunctionStatus::completed;
|
|
EXPECT_FALSE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
hostFunctionId = 1u;
|
|
EXPECT_TRUE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
EXPECT_EQ(isTbx, downloadAllocationCalled);
|
|
|
|
hostFunctionStreamer->prepareForExecution(programmedHostFunction1);
|
|
|
|
// next host function must wait, streamer busy until host function is completed
|
|
EXPECT_FALSE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
hostFunctionStreamer->signalHostFunctionCompletion(programmedHostFunction1);
|
|
EXPECT_EQ(HostFunctionStatus::completed, hostFunctionId); // host function ID should be marked as completed
|
|
|
|
EXPECT_EQ(callbackAddress1, programmedHostFunction1.hostFunctionAddress);
|
|
EXPECT_EQ(userDataAddress1, programmedHostFunction1.userDataAddress);
|
|
EXPECT_TRUE(programmedHostFunction1.isInOrder);
|
|
}
|
|
{
|
|
hostFunctionId = HostFunctionStatus::completed;
|
|
|
|
// 2nd host function out of order
|
|
HostFunctionHelper<FamilyType>::programHostFunction(stream, *hostFunctionStreamer.get(), std::move(hostFunction2));
|
|
|
|
hostFunctionId = 3u; // simulate function being processed
|
|
|
|
auto programmedHostFunction2 = hostFunctionStreamer->getHostFunction();
|
|
|
|
EXPECT_EQ(&mockAllocation, hostFunctionStreamer->getHostFunctionIdAllocation());
|
|
EXPECT_EQ(hostFunctionIdAddress, hostFunctionStreamer->getHostFunctionIdGpuAddress());
|
|
|
|
hostFunctionId = HostFunctionStatus::completed;
|
|
EXPECT_FALSE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
|
|
hostFunctionId = hostFunctionStreamer->getNextHostFunctionIdAndIncrement();
|
|
EXPECT_TRUE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
EXPECT_EQ(isTbx, downloadAllocationCalled);
|
|
|
|
hostFunctionStreamer->prepareForExecution(programmedHostFunction2);
|
|
hostFunctionStreamer->signalHostFunctionCompletion(programmedHostFunction2);
|
|
EXPECT_EQ(HostFunctionStatus::completed, hostFunctionId); // host function ID should be marked as completed
|
|
|
|
EXPECT_EQ(callbackAddress2, programmedHostFunction2.hostFunctionAddress);
|
|
EXPECT_EQ(userDataAddress2, programmedHostFunction2.userDataAddress);
|
|
EXPECT_FALSE(programmedHostFunction2.isInOrder);
|
|
}
|
|
{
|
|
// no more programmed Host Functions
|
|
EXPECT_FALSE(hostFunctionStreamer->isHostFunctionReadyToExecute());
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(CommandStreamReceiverHostFunctionsTest, givenCommandStreamReceiverWhenEnsureHostFunctionDataInitializationCalledThenHostFunctionAllocationIsBeingAllocatedOnlyOnce) {
|
|
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
|
DeviceBitfield devices(0b11);
|
|
auto csr = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, devices);
|
|
executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment));
|
|
|
|
csr->initializeTagAllocation();
|
|
csr->ensureHostFunctionWorkerStarted();
|
|
auto *streamer = &csr->getHostFunctionStreamer();
|
|
EXPECT_NE(nullptr, streamer);
|
|
EXPECT_EQ(1u, csr->startHostFunctionWorkerCalledTimes);
|
|
|
|
csr->ensureHostFunctionWorkerStarted();
|
|
EXPECT_EQ(streamer, &csr->getHostFunctionStreamer());
|
|
EXPECT_EQ(1u, csr->startHostFunctionWorkerCalledTimes);
|
|
|
|
csr->startHostFunctionWorker();
|
|
EXPECT_EQ(2u, csr->startHostFunctionWorkerCalledTimes); // direct call -> the counter updated but due to an early return allocation didn't change
|
|
EXPECT_EQ(streamer, &csr->getHostFunctionStreamer());
|
|
|
|
EXPECT_EQ(AllocationType::tagBuffer, streamer->getHostFunctionIdAllocation()->getAllocationType());
|
|
|
|
auto expectedHostFunctionIdAddress = reinterpret_cast<uint64_t>(ptrOffset(streamer->getHostFunctionIdAllocation()->getUnderlyingBuffer(),
|
|
TagAllocationLayout::hostFunctionDataOffset));
|
|
|
|
EXPECT_EQ(expectedHostFunctionIdAddress, streamer->getHostFunctionIdGpuAddress());
|
|
}
|
|
|
|
TEST(CommandStreamReceiverHostFunctionsTest, givenDestructedCommandStreamReceiverWhenEnsureHostFunctionDataInitializationCalledThenHostFunctionAllocationsDeallocated) {
|
|
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
|
DeviceBitfield devices(0b11);
|
|
|
|
auto csr = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, devices);
|
|
executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment));
|
|
csr->initializeTagAllocation();
|
|
|
|
csr->ensureHostFunctionWorkerStarted();
|
|
EXPECT_NE(nullptr, csr->getHostFunctionStreamer().getHostFunctionIdAllocation());
|
|
EXPECT_EQ(1u, csr->createHostFunctionWorkerCounter);
|
|
}
|