feature: add host function allocation

Related-To: NEO-14577
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2025-09-22 08:20:23 +00:00
committed by Compute-Runtime-Automation
parent 27922536ff
commit 75b4de70cd
20 changed files with 175 additions and 15 deletions

View File

@@ -40,6 +40,7 @@ bool AubHelper::isOneTimeAubWritableAllocationType(const AllocationType &type) {
case AllocationType::assertBuffer:
case AllocationType::tagBuffer:
case AllocationType::syncDispatchToken:
case AllocationType::hostFunction:
return true;
case AllocationType::bufferHostMemory:
return NEO::debugManager.isTbxPageFaultManagerEnabled() ||

View File

@@ -418,6 +418,16 @@ void CommandStreamReceiver::cleanupResources() {
tagsMultiAllocation = nullptr;
}
if (hostFunctionDataMultiAllocation) {
hostFunctionDataAllocation = nullptr;
for (auto graphicsAllocation : hostFunctionDataMultiAllocation->getGraphicsAllocations()) {
getMemoryManager()->freeGraphicsMemory(graphicsAllocation);
}
delete hostFunctionDataMultiAllocation;
hostFunctionDataMultiAllocation = nullptr;
}
if (globalFenceAllocation) {
getMemoryManager()->freeGraphicsMemory(globalFenceAllocation);
globalFenceAllocation = nullptr;
@@ -538,7 +548,7 @@ void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
reinterpret_cast<uint8_t *>(allocation->getUnderlyingBuffer()) + TagAllocationLayout::debugPauseStateAddressOffset);
}
MultiGraphicsAllocation &CommandStreamReceiver::createTagsMultiAllocation() {
MultiGraphicsAllocation &CommandStreamReceiver::createMultiAllocationInSystemMemoryPool(AllocationType allocationType) {
RootDeviceIndicesContainer rootDeviceIndices;
rootDeviceIndices.pushUnique(rootDeviceIndex);
@@ -546,7 +556,7 @@ MultiGraphicsAllocation &CommandStreamReceiver::createTagsMultiAllocation() {
auto maxRootDeviceIndex = static_cast<uint32_t>(this->executionEnvironment.rootDeviceEnvironments.size() - 1);
auto allocations = new MultiGraphicsAllocation(maxRootDeviceIndex);
AllocationProperties unifiedMemoryProperties{rootDeviceIndex, MemoryConstants::pageSize, AllocationType::tagBuffer, systemMemoryBitfield};
AllocationProperties unifiedMemoryProperties{rootDeviceIndex, MemoryConstants::pageSize, allocationType, systemMemoryBitfield};
this->getMemoryManager()->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndices, unifiedMemoryProperties, *allocations);
return *allocations;
@@ -695,6 +705,37 @@ void *CommandStreamReceiver::getIndirectHeapCurrentPtr(IndirectHeapType heapType
return nullptr;
}
void CommandStreamReceiver::ensureHostFunctionDataInitialization() {
if (hostFunctionInitialized == false) {
initializeHostFunctionData();
}
}
void CommandStreamReceiver::initializeHostFunctionData() {
auto lock = obtainUniqueOwnership();
if (hostFunctionInitialized) {
return;
}
this->hostFunctionDataMultiAllocation = &this->createMultiAllocationInSystemMemoryPool(AllocationType::hostFunction);
this->hostFunctionDataAllocation = hostFunctionDataMultiAllocation->getGraphicsAllocation(rootDeviceIndex);
void *hostFunctionBuffer = hostFunctionDataAllocation->getUnderlyingBuffer();
this->hostFunctionData.entry = reinterpret_cast<decltype(HostFunctionData::entry)>(ptrOffset(hostFunctionBuffer, HostFunctionHelper::entryOffset));
this->hostFunctionData.userData = reinterpret_cast<decltype(HostFunctionData::userData)>(ptrOffset(hostFunctionBuffer, HostFunctionHelper::userDataOffset));
this->hostFunctionData.internalTag = reinterpret_cast<decltype(HostFunctionData::internalTag)>(ptrOffset(hostFunctionBuffer, HostFunctionHelper::internalTagOffset));
this->hostFunctionInitialized = true;
}
HostFunctionData &CommandStreamReceiver::getHostFunctionData() {
return hostFunctionData;
}
GraphicsAllocation *CommandStreamReceiver::getHostFunctionDataAllocation() {
return hostFunctionDataAllocation;
}
IndirectHeap &CommandStreamReceiver::getIndirectHeap(IndirectHeap::Type heapType,
size_t minRequiredSize) {
DEBUG_BREAK_IF(static_cast<uint32_t>(heapType) >= arrayCount(indirectHeap));
@@ -824,8 +865,12 @@ void *CommandStreamReceiver::asyncDebugBreakConfirmation(void *arg) {
return nullptr;
}
void CommandStreamReceiver::makeResidentHostFunctionAllocation() {
makeResident(*hostFunctionDataAllocation);
}
bool CommandStreamReceiver::initializeTagAllocation() {
this->tagsMultiAllocation = &this->createTagsMultiAllocation();
this->tagsMultiAllocation = &this->createMultiAllocationInSystemMemoryPool(AllocationType::tagBuffer);
auto tagAllocation = tagsMultiAllocation->getGraphicsAllocation(rootDeviceIndex);
if (!tagAllocation) {

View File

@@ -6,6 +6,7 @@
*/
#pragma once
#include "shared/source/command_stream/host_function.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/memory_compression_state.h"
#include "shared/source/command_stream/preemption_mode.h"
@@ -160,8 +161,8 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
MultiGraphicsAllocation *getTagsMultiAllocation() const {
return tagsMultiAllocation;
}
MultiGraphicsAllocation &createTagsMultiAllocation();
MultiGraphicsAllocation &createMultiAllocationInSystemMemoryPool(AllocationType allocationType);
void makeResidentHostFunctionAllocation();
TaskCountType getNextBarrierCount() { return this->barrierCount.fetch_add(1u); }
TaskCountType peekBarrierCount() const { return this->barrierCount.load(); }
volatile TagAddressType *getTagAddress() const { return tagAddress; }
@@ -564,7 +565,13 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
MOCKABLE_VIRTUAL uint32_t getContextGroupId() const;
void ensureHostFunctionDataInitialization();
HostFunctionData &getHostFunctionData();
GraphicsAllocation *getHostFunctionDataAllocation();
protected:
MOCKABLE_VIRTUAL void initializeHostFunctionData();
virtual CompletionStamp flushTaskHeapless(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh,
TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0;
@@ -637,11 +644,13 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
GraphicsAllocation *globalStatelessHeapAllocation = nullptr;
MultiGraphicsAllocation *tagsMultiAllocation = nullptr;
MultiGraphicsAllocation *hostFunctionDataMultiAllocation = nullptr;
GraphicsAllocation *hostFunctionDataAllocation = nullptr;
IndirectHeap *indirectHeap[IndirectHeapType::numTypes];
OsContext *osContext = nullptr;
CommandStreamReceiver *primaryCsr = nullptr;
TaskCountType *completionFenceValuePointer = nullptr;
HostFunctionData hostFunctionData;
std::atomic<TaskCountType> barrierCount{0};
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
@@ -672,7 +681,6 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
uint32_t immWritePostSyncWriteOffset = 0;
uint32_t timeStampPostSyncWriteOffset = 0;
TaskCountType completionFenceValue = 0;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;
@@ -697,7 +705,7 @@ class CommandStreamReceiver : NEO::NonCopyableAndNonMovableClass {
bool requiresInstructionCacheFlush = false;
bool requiresDcFlush = false;
bool pushAllocationsForMakeResident = true;
bool hostFunctionInitialized = false;
bool localMemoryEnabled = false;
bool pageTableManagerInitialized = false;

View File

@@ -60,7 +60,8 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
return GMM_RESOURCE_USAGE_OCL_BUFFER_CSR_UC;
} else if (AllocationType::semaphoreBuffer == allocationType) {
return GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER;
} else if (AllocationType::tagBuffer == allocationType) {
} else if (AllocationType::tagBuffer == allocationType ||
AllocationType::hostFunction == allocationType) {
return GMM_RESOURCE_USAGE_OCL_BUFFER;
}
}
@@ -96,6 +97,7 @@ GMM_RESOURCE_USAGE_TYPE_ENUM CacheSettingsHelper::getDefaultUsageTypeWithCaching
case AllocationType::svmZeroCopy:
case AllocationType::tagBuffer:
case AllocationType::printfSurface:
case AllocationType::hostFunction:
if (debugManager.flags.DisableCachingForStatefulBufferAccess.get()) {
return getDefaultUsageTypeWithCachingDisabled(allocationType, productHelper);
}

View File

@@ -118,6 +118,8 @@ const char *AppResourceHelper::getResourceTagStr(AllocationType type) {
return "ASSRTBUF";
case AllocationType::syncDispatchToken:
return "SYNCTOK";
case AllocationType::hostFunction:
return "HOSTFUNC";
default:
return "NOTFOUND";
}

View File

@@ -59,6 +59,7 @@ enum class AllocationType {
deferredTasksList,
assertBuffer,
syncDispatchToken,
hostFunction,
count
};

View File

@@ -600,6 +600,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
case AllocationType::debugContextSaveArea:
case AllocationType::debugSbaTrackingBuffer:
case AllocationType::swTagBuffer:
case AllocationType::hostFunction:
allocationData.flags.useSystemMemory = true;
default:
break;

View File

@@ -221,6 +221,8 @@ const char *getAllocationTypeString(GraphicsAllocation const *graphicsAllocation
return "ASSERT_BUFFER";
case AllocationType::syncDispatchToken:
return "SYNC_DISPATCH_TOKEN";
case AllocationType::hostFunction:
return "HOST_FUNCTION";
default:
return "ILLEGAL_VALUE";
}

View File

@@ -56,6 +56,7 @@ std::optional<GfxMemoryAllocationMethod> ProductHelperHw<gfxProduct>::getPreferr
switch (allocationType) {
case AllocationType::tagBuffer:
case AllocationType::timestampPacketTagBuffer:
case AllocationType::hostFunction:
return {};
default:
return GfxMemoryAllocationMethod::allocateByKmd;

View File

@@ -43,6 +43,8 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
using CommandStreamReceiver::gpuHangCheckPeriod;
using CommandStreamReceiver::heaplessStateInitEnabled;
using CommandStreamReceiver::heaplessStateInitialized;
using CommandStreamReceiver::hostFunctionDataAllocation;
using CommandStreamReceiver::hostFunctionDataMultiAllocation;
using CommandStreamReceiver::immWritePostSyncWriteOffset;
using CommandStreamReceiver::internalAllocationStorage;
using CommandStreamReceiver::latestFlushedTaskCount;
@@ -277,6 +279,11 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
BaseClass::setupContext(osContext);
}
void initializeHostFunctionData() override {
initializeHostFunctionDataCalledTimes++;
BaseClass::initializeHostFunctionData();
}
static constexpr size_t tagSize = 256;
static volatile TagAddressType mockTagAddress[tagSize];
std::vector<char> instructionHeapReserveredData;
@@ -289,6 +296,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
uint32_t downloadAllocationsCalledCount = 0;
uint32_t submitDependencyUpdateCalledTimes = 0;
uint32_t stopDirectSubmissionCalledTimes = 0;
uint32_t initializeHostFunctionDataCalledTimes = 0;
int hostPtrSurfaceCreationMutexLockCount = 0;
bool multiOsContextCapable = false;
bool memoryCompressionEnabled = false;

View File

@@ -119,6 +119,7 @@ TEST(AubHelper, givenAllocationTypeWhenAskingIfOneTimeWritableThenReturnCorrectR
case AllocationType::assertBuffer:
case AllocationType::tagBuffer:
case AllocationType::syncDispatchToken:
case AllocationType::hostFunction:
EXPECT_TRUE(isOneTimeWritable);
break;
default:

View File

@@ -6446,3 +6446,75 @@ HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingHardwareMode
ultCsr.commandStreamReceiverType = CommandStreamReceiverType::tbxWithAub;
EXPECT_FALSE(ultCsr.isHardwareMode());
}
TEST(CommandStreamReceiverHostFunctionsTest, givenCommandStreamReceiverWhenEnsureHostFunctionDataInitializationCalledThenHostFunctionAllocationIsBeingAllocatedOnlyOnce) {
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
DeviceBitfield devices(0b11);
auto csr = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, devices);
executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment));
EXPECT_EQ(nullptr, csr->getHostFunctionDataAllocation());
csr->ensureHostFunctionDataInitialization();
auto *hostDataAllocation = csr->getHostFunctionDataAllocation();
EXPECT_NE(nullptr, hostDataAllocation);
EXPECT_EQ(1u, csr->initializeHostFunctionDataCalledTimes);
csr->ensureHostFunctionDataInitialization();
EXPECT_EQ(hostDataAllocation, csr->getHostFunctionDataAllocation());
EXPECT_EQ(1u, csr->initializeHostFunctionDataCalledTimes);
csr->initializeHostFunctionData();
EXPECT_EQ(2u, csr->initializeHostFunctionDataCalledTimes); // direct call -> the counter updated but due to an early return allocation didn't change
EXPECT_EQ(hostDataAllocation, csr->getHostFunctionDataAllocation());
EXPECT_EQ(AllocationType::hostFunction, hostDataAllocation->getAllocationType());
auto expectedHostFunctionAddress = reinterpret_cast<uint64_t>(ptrOffset(hostDataAllocation->getUnderlyingBuffer(), HostFunctionHelper::entryOffset));
EXPECT_EQ(expectedHostFunctionAddress, reinterpret_cast<uint64_t>(csr->getHostFunctionData().entry));
auto expectedUserDataAddress = reinterpret_cast<uint64_t>(ptrOffset(hostDataAllocation->getUnderlyingBuffer(), HostFunctionHelper::userDataOffset));
EXPECT_EQ(expectedUserDataAddress, reinterpret_cast<uint64_t>(csr->getHostFunctionData().userData));
auto expectedInternalTagAddress = reinterpret_cast<uint64_t>(ptrOffset(hostDataAllocation->getUnderlyingBuffer(), HostFunctionHelper::internalTagOffset));
EXPECT_EQ(expectedInternalTagAddress, reinterpret_cast<uint64_t>(csr->getHostFunctionData().internalTag));
}
TEST(CommandStreamReceiverHostFunctionsTest, givenDestructedCommandStreamReceiverWhenEnsureHostFunctionDataInitializationCalledThenHostFunctionAllocationsDeallocated) {
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
DeviceBitfield devices(0b11);
auto csr = std::make_unique<MockCommandStreamReceiver>(executionEnvironment, 0, devices);
executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment));
EXPECT_EQ(nullptr, csr->getHostFunctionDataAllocation());
csr->ensureHostFunctionDataInitialization();
EXPECT_NE(nullptr, csr->hostFunctionDataAllocation);
EXPECT_NE(nullptr, csr->hostFunctionDataMultiAllocation);
csr->cleanupResources();
EXPECT_EQ(nullptr, csr->hostFunctionDataAllocation);
EXPECT_EQ(nullptr, csr->hostFunctionDataMultiAllocation);
}
TEST(CommandStreamReceiverHostFunctionsTest, givenCommandStreamReceiverWithHostFunctionDataWhenMakeResidentHostFunctionAllocationIsCalledThenHostAllocationIsResident) {
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get(), 0u));
auto &csr = *device->commandStreamReceivers[0];
ASSERT_EQ(nullptr, csr.getHostFunctionDataAllocation());
csr.ensureHostFunctionDataInitialization();
auto *hostDataAllocation = csr.getHostFunctionDataAllocation();
ASSERT_NE(nullptr, hostDataAllocation);
auto csrContextId = csr.getOsContext().getContextId();
EXPECT_FALSE(hostDataAllocation->isResident(csrContextId));
csr.makeResidentHostFunctionAllocation();
EXPECT_TRUE(hostDataAllocation->isResident(csrContextId));
csr.makeNonResident(*hostDataAllocation);
EXPECT_FALSE(hostDataAllocation->isResident(csrContextId));
EXPECT_EQ(1u, csr.getEvictionAllocations().size());
}

View File

@@ -1555,6 +1555,7 @@ static constexpr std::array onceWritableAllocTypesForTbx{
AllocationType::tagBuffer,
AllocationType::syncDispatchToken,
AllocationType::bufferHostMemory,
AllocationType::hostFunction,
};
HWTEST_F(TbxCommandStreamTests, givenAubOneTimeWritableAllocWhenTbxFaultManagerIsAvailableAndAllocIsTbxFaultableThenTbxFaultableTypesShouldReturnTrue) {

View File

@@ -722,6 +722,7 @@ TEST(GmmTest, givenAllocationTypeWhenGettingUsageTypeThenReturnCorrectValue) {
case AllocationType::svmZeroCopy:
case AllocationType::tagBuffer:
case AllocationType::printfSurface:
case AllocationType::hostFunction:
expectedUsage = forceUncached ? uncachedGmmUsageType : GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER;
break;
default:
@@ -831,6 +832,7 @@ TEST(GmmTest, givenAllocationTypeAndMitigatedDcFlushWhenGettingUsageTypeThenRetu
case AllocationType::svmZeroCopy:
case AllocationType::tagBuffer:
case AllocationType::printfSurface:
case AllocationType::hostFunction:
expectedUsage = GMM_RESOURCE_USAGE_OCL_SYSTEM_MEMORY_BUFFER;
break;
default:

View File

@@ -102,7 +102,8 @@ AllocationTypeTagTestCase allocationTypeTagValues[static_cast<int>(AllocationTyp
{AllocationType::swTagBuffer, "SWTAGBF"},
{AllocationType::deferredTasksList, "TSKLIST"},
{AllocationType::assertBuffer, "ASSRTBUF"},
{AllocationType::syncDispatchToken, "SYNCTOK"}};
{AllocationType::syncDispatchToken, "SYNCTOK"},
{AllocationType::hostFunction, "HOSTFUNC"}};
class AllocationTypeTagString : public ::testing::TestWithParam<AllocationTypeTagTestCase> {};
TEST_P(AllocationTypeTagString, givenGraphicsAllocationTypeWhenCopyTagToStorageInfoThenCorrectTagIsReturned) {

View File

@@ -532,6 +532,14 @@ TEST(MemoryManagerTest, givenTagBufferTypeWhenGetAllocationDataIsCalledThenSyste
EXPECT_TRUE(allocData.flags.useSystemMemory);
}
TEST(MemoryManagerTest, givenHostFunctionTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) {
AllocationData allocData;
MockMemoryManager mockMemoryManager;
AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::hostFunction, mockDeviceBitfield};
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
EXPECT_TRUE(allocData.flags.useSystemMemory);
}
TEST(MemoryManagerTest, givenGlobalFenceTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) {
AllocationData allocData;
MockMemoryManager mockMemoryManager;

View File

@@ -601,7 +601,8 @@ TEST_F(WddmMemoryManagerTests, givenTypeWhenCallIsStatelessAccessRequiredThenPro
AllocationType::swTagBuffer,
AllocationType::deferredTasksList,
AllocationType::assertBuffer,
AllocationType::syncDispatchToken}) {
AllocationType::syncDispatchToken,
AllocationType::hostFunction}) {
EXPECT_FALSE(wddmMemoryManager->isStatelessAccessRequired(type));
}
}

View File

@@ -447,7 +447,7 @@ TEST(AllocationTypeLogging, givenGraphicsAllocationTypeWhenConvertingToStringThe
DebugVariables flags;
FullyEnabledFileLogger fileLogger(testFile, flags);
std::array<std::pair<NEO::AllocationType, const char *>, 40> allocationTypeValues = {
std::array<std::pair<NEO::AllocationType, const char *>, 41> allocationTypeValues = {
{{AllocationType::buffer, "BUFFER"},
{AllocationType::bufferHostMemory, "BUFFER_HOST_MEMORY"},
{AllocationType::commandBuffer, "COMMAND_BUFFER"},
@@ -487,7 +487,8 @@ TEST(AllocationTypeLogging, givenGraphicsAllocationTypeWhenConvertingToStringThe
{AllocationType::debugContextSaveArea, "DEBUG_CONTEXT_SAVE_AREA"},
{AllocationType::debugSbaTrackingBuffer, "DEBUG_SBA_TRACKING_BUFFER"},
{AllocationType::debugModuleArea, "DEBUG_MODULE_AREA"},
{AllocationType::swTagBuffer, "SW_TAG_BUFFER"}}};
{AllocationType::swTagBuffer, "SW_TAG_BUFFER"},
{AllocationType::hostFunction, "HOST_FUNCTION"}}};
for (const auto &[type, str] : allocationTypeValues) {
GraphicsAllocation graphicsAllocation(0, 1u /*num gmms*/, type, nullptr, 0, 0, MemoryPool::memoryNull, MemoryManager::maxOsContextCount, 0llu);

View File

@@ -102,7 +102,8 @@ LNLTEST_F(LnlProductHelper, whenCheckPreferredAllocationMethodThenAllocateByKmdI
auto allocationType = static_cast<AllocationType>(i);
auto preferredAllocationMethod = productHelper->getPreferredAllocationMethod(allocationType);
if (allocationType == AllocationType::tagBuffer ||
allocationType == AllocationType::timestampPacketTagBuffer) {
allocationType == AllocationType::timestampPacketTagBuffer ||
allocationType == AllocationType::hostFunction) {
EXPECT_TRUE(preferredAllocationMethod.has_value());
EXPECT_EQ(GfxMemoryAllocationMethod::allocateByKmd, preferredAllocationMethod.value());
}

View File

@@ -336,7 +336,8 @@ HWTEST2_F(XeLpgProductHelperTests, whenCheckPreferredAllocationMethodThenAllocat
auto allocationType = static_cast<AllocationType>(i);
auto preferredAllocationMethod = productHelper->getPreferredAllocationMethod(allocationType);
if (allocationType == AllocationType::tagBuffer ||
allocationType == AllocationType::timestampPacketTagBuffer) {
allocationType == AllocationType::timestampPacketTagBuffer ||
allocationType == AllocationType::hostFunction) {
EXPECT_FALSE(preferredAllocationMethod.has_value());
} else {
EXPECT_TRUE(preferredAllocationMethod.has_value());