mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
feature: implement pool allocator for gpuTimestampDeviceBuffer
The patch applies to Level Zero. Only allocations < 2MB will be fetched from the pool. Allocations are shared and reused within a given device. Additionally, I added a new debug flag to control the allocator: EnableTimestampPoolAllocator Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8836f6df0b
commit
7ef3880793
@@ -21,6 +21,7 @@
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/memory_operations_handler.h"
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
#include "shared/source/utilities/timestamp_pool_allocator.h"
|
||||
#include "shared/source/utilities/wait_util.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
@@ -116,17 +117,30 @@ ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32
|
||||
auto neoDevice = devices[0]->getNEODevice();
|
||||
if (this->isDeviceEventPoolAllocation) {
|
||||
this->isHostVisibleEventPoolAllocation = !(isEventPoolDeviceAllocationFlagSet());
|
||||
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, neoDevice->getDeviceBitfield()};
|
||||
allocationProperties.alignment = eventAlignment;
|
||||
|
||||
auto memoryManager = driver->getMemoryManager();
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
if (graphicsAllocation) {
|
||||
eventPoolAllocations->addAllocation(graphicsAllocation);
|
||||
allocatedMemory = true;
|
||||
if (isIpcPoolFlagSet()) {
|
||||
uint64_t handle = 0;
|
||||
this->isShareableEventMemory = (graphicsAllocation->peekInternalHandle(memoryManager, handle) == 0);
|
||||
if (neoDevice->getDeviceTimestampPoolAllocator().isEnabled() &&
|
||||
!isIpcPoolFlagSet()) {
|
||||
auto sharedTsAlloc = neoDevice->getDeviceTimestampPoolAllocator().requestGraphicsAllocationForTimestamp(this->eventPoolSize);
|
||||
if (sharedTsAlloc) {
|
||||
this->sharedTimestampAllocation.reset(sharedTsAlloc);
|
||||
eventPoolAllocations->addAllocation(this->sharedTimestampAllocation->getGraphicsAllocation());
|
||||
allocatedMemory = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!allocatedMemory) {
|
||||
NEO::AllocationProperties allocationProperties{*rootDeviceIndices.begin(), this->eventPoolSize, allocationType, neoDevice->getDeviceBitfield()};
|
||||
allocationProperties.alignment = eventAlignment;
|
||||
|
||||
auto memoryManager = driver->getMemoryManager();
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(allocationProperties);
|
||||
if (graphicsAllocation) {
|
||||
eventPoolAllocations->addAllocation(graphicsAllocation);
|
||||
allocatedMemory = true;
|
||||
if (isIpcPoolFlagSet()) {
|
||||
uint64_t handle = 0;
|
||||
this->isShareableEventMemory = (graphicsAllocation->peekInternalHandle(memoryManager, handle) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -156,10 +170,17 @@ EventPool::~EventPool() {
|
||||
if (eventPoolAllocations) {
|
||||
auto graphicsAllocations = eventPoolAllocations->getGraphicsAllocations();
|
||||
auto memoryManager = devices[0]->getDriverHandle()->getMemoryManager();
|
||||
auto sharedTsAlloc = this->sharedTimestampAllocation ? this->sharedTimestampAllocation->getGraphicsAllocation() : nullptr;
|
||||
for (auto gpuAllocation : graphicsAllocations) {
|
||||
memoryManager->freeGraphicsMemory(gpuAllocation);
|
||||
if (gpuAllocation != sharedTsAlloc) {
|
||||
memoryManager->freeGraphicsMemory(gpuAllocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this->sharedTimestampAllocation) {
|
||||
auto neoDevice = devices[0]->getNEODevice();
|
||||
neoDevice->getDeviceTimestampPoolAllocator().freeSharedTimestampAllocation(this->sharedTimestampAllocation.release());
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t EventPool::destroy() {
|
||||
@@ -555,7 +576,7 @@ uint64_t Event::getGpuAddress(Device *device) const {
|
||||
if (!inOrderTimestampNode.empty()) {
|
||||
return inOrderTimestampNode.back()->getGpuAddress();
|
||||
}
|
||||
return getAllocation(device)->getGpuAddress() + this->eventPoolOffset;
|
||||
return getAllocation(device)->getGpuAddress() + this->eventPoolOffset + this->offsetInSharedAlloc;
|
||||
}
|
||||
|
||||
void *Event::getHostAddress() const {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/helpers/timestamp_packet_container.h"
|
||||
#include "shared/source/memory_manager/multi_graphics_allocation.h"
|
||||
#include "shared/source/os_interface/os_time.h"
|
||||
#include "shared/source/utilities/timestamp_pool_allocator.h"
|
||||
|
||||
#include "level_zero/core/source/helpers/api_handle_helper.h"
|
||||
|
||||
@@ -87,6 +88,7 @@ inline constexpr uint32_t eventPackets = maxKernelSplit * NEO ::TimestampPacketC
|
||||
struct EventDescriptor {
|
||||
NEO::MultiGraphicsAllocation *eventPoolAllocation = nullptr;
|
||||
const void *extensions = nullptr;
|
||||
size_t offsetInSharedAlloc = 0;
|
||||
uint32_t totalEventSize = 0;
|
||||
uint32_t maxKernelCount = 0;
|
||||
uint32_t maxPacketsCount = 0;
|
||||
@@ -335,6 +337,8 @@ struct Event : _ze_event_handle_t {
|
||||
|
||||
virtual ze_result_t hostEventSetValue(State eventState) = 0;
|
||||
|
||||
size_t getOffsetInSharedAlloc() const { return offsetInSharedAlloc; }
|
||||
|
||||
protected:
|
||||
Event(int index, Device *device) : device(device), index(index) {}
|
||||
|
||||
@@ -366,6 +370,7 @@ struct Event : _ze_event_handle_t {
|
||||
size_t timestampSizeInDw = 0u;
|
||||
size_t singlePacketSize = 0u;
|
||||
size_t eventPoolOffset = 0u;
|
||||
size_t offsetInSharedAlloc = 0u;
|
||||
|
||||
size_t cpuStartTimestamp = 0u;
|
||||
size_t gpuStartTimestamp = 0u;
|
||||
@@ -432,6 +437,9 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
inline ze_event_pool_handle_t toHandle() { return this; }
|
||||
|
||||
MOCKABLE_VIRTUAL NEO::MultiGraphicsAllocation &getAllocation() { return *eventPoolAllocations; }
|
||||
std::unique_ptr<NEO::SharedTimestampAllocation> &getSharedTimestampAllocation() {
|
||||
return sharedTimestampAllocation;
|
||||
}
|
||||
|
||||
uint32_t getEventSize() const { return eventSize; }
|
||||
void setEventSize(uint32_t size) { eventSize = size; }
|
||||
@@ -485,6 +493,8 @@ struct EventPool : _ze_event_pool_handle_t {
|
||||
std::vector<Device *> devices;
|
||||
|
||||
std::unique_ptr<NEO::MultiGraphicsAllocation> eventPoolAllocations;
|
||||
std::unique_ptr<NEO::SharedTimestampAllocation> sharedTimestampAllocation;
|
||||
|
||||
void *eventPoolPtr = nullptr;
|
||||
ContextImp *context = nullptr;
|
||||
|
||||
|
||||
@@ -53,7 +53,8 @@ Event *Event::create(const EventDescriptor &eventDescriptor, Device *device, ze_
|
||||
|
||||
event->totalEventSize = eventDescriptor.totalEventSize;
|
||||
event->eventPoolOffset = eventDescriptor.index * event->totalEventSize;
|
||||
event->hostAddressFromPool = ptrOffset(baseHostAddress, event->eventPoolOffset);
|
||||
event->offsetInSharedAlloc = eventDescriptor.offsetInSharedAlloc;
|
||||
event->hostAddressFromPool = ptrOffset(baseHostAddress, event->eventPoolOffset + event->offsetInSharedAlloc);
|
||||
event->signalScope = eventDescriptor.signalScope;
|
||||
|
||||
if (NEO::debugManager.flags.ForceHostSignalScope.get() == 1) {
|
||||
@@ -138,6 +139,10 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
|
||||
eventDescriptor.eventPoolAllocation = nullptr;
|
||||
}
|
||||
|
||||
if (eventPool->getSharedTimestampAllocation()) {
|
||||
eventDescriptor.offsetInSharedAlloc = eventPool->getSharedTimestampAllocation()->getOffset();
|
||||
}
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
Event *event = Event::create<TagSizeT>(eventDescriptor, device, result);
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
namespace NEO {
|
||||
struct KernelDescriptor;
|
||||
struct MetadataGeneration;
|
||||
class SharedIsaAllocation;
|
||||
class SharedPoolAllocation;
|
||||
|
||||
namespace Zebin::Debug {
|
||||
struct Segments;
|
||||
@@ -200,7 +200,7 @@ struct ModuleImp : public Module {
|
||||
std::unique_ptr<ModuleTranslationUnit> translationUnit;
|
||||
ModuleBuildLog *moduleBuildLog = nullptr;
|
||||
NEO::GraphicsAllocation *exportedFunctionsSurface = nullptr;
|
||||
std::unique_ptr<NEO::SharedIsaAllocation> sharedIsaAllocation;
|
||||
std::unique_ptr<NEO::SharedPoolAllocation> sharedIsaAllocation;
|
||||
std::vector<std::shared_ptr<Kernel>> printfKernelContainer;
|
||||
std::vector<std::unique_ptr<KernelImmutableData>> kernelImmDatas;
|
||||
NEO::Linker::RelocatedSymbolsMap symbols;
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/memory_manager/gfx_partition.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
@@ -123,9 +124,15 @@ struct EventPoolFailTests : public ::testing::Test {
|
||||
devices.push_back(std::unique_ptr<NEO::Device>(neoDevice));
|
||||
driverHandle = std::make_unique<DriverHandleImp>();
|
||||
driverHandle->initialize(std::move(devices));
|
||||
prevMemoryManager = driverHandle->getMemoryManager();
|
||||
|
||||
prevMemoryManagerDriver = driverHandle->getMemoryManager();
|
||||
prevMemoryManagerExecEnv = neoDevice->executionEnvironment->memoryManager.release();
|
||||
|
||||
currMemoryManager = new MemoryManagerEventPoolFailMock(*neoDevice->executionEnvironment);
|
||||
|
||||
driverHandle->setMemoryManager(currMemoryManager);
|
||||
neoDevice->executionEnvironment->memoryManager.reset(currMemoryManager);
|
||||
|
||||
device = driverHandle->devices[0];
|
||||
|
||||
context = std::make_unique<ContextImp>(driverHandle.get());
|
||||
@@ -137,11 +144,19 @@ struct EventPoolFailTests : public ::testing::Test {
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
driverHandle->setMemoryManager(prevMemoryManager);
|
||||
driverHandle->setMemoryManager(prevMemoryManagerDriver);
|
||||
|
||||
neoDevice->executionEnvironment->memoryManager.release();
|
||||
neoDevice->executionEnvironment->memoryManager.reset(prevMemoryManagerExecEnv);
|
||||
|
||||
delete currMemoryManager;
|
||||
}
|
||||
NEO::MemoryManager *prevMemoryManager = nullptr;
|
||||
|
||||
NEO::MemoryManager *prevMemoryManagerDriver = nullptr;
|
||||
NEO::MemoryManager *prevMemoryManagerExecEnv = nullptr;
|
||||
|
||||
NEO::MemoryManager *currMemoryManager = nullptr;
|
||||
|
||||
std::unique_ptr<DriverHandleImp> driverHandle;
|
||||
NEO::MockDevice *neoDevice = nullptr;
|
||||
L0::Device *device = nullptr;
|
||||
@@ -160,6 +175,21 @@ TEST_F(EventPoolFailTests, whenCreatingEventPoolAndAllocationFailsThenOutOfDevic
|
||||
EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
TEST_F(EventPoolFailTests, givenEnabledTimestampPoolAllocatorWhenCreatingEventPoolAndAllocationFailsThenOutOfDeviceMemoryIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(1);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
nullptr,
|
||||
ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP,
|
||||
1};
|
||||
|
||||
ze_event_pool_handle_t eventPool = {};
|
||||
ze_result_t res = context->createEventPool(&eventPoolDesc, 0, nullptr, &eventPool);
|
||||
EXPECT_EQ(res, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
TEST_F(EventPoolCreate, GivenEventPoolThenAllocationContainsAtLeast16Bytes) {
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
@@ -373,6 +403,34 @@ TEST_F(EventPoolCreate, GivenDeviceThenEventPoolIsCreated) {
|
||||
}
|
||||
eventPool->destroy();
|
||||
}
|
||||
|
||||
TEST_F(EventPoolCreate, GivenEnabledTimestampPoolAllocatorWhenCreatingEventPoolWithIpcFlagThenTimestampPoolAllocatorIsNotUsed) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(1);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
|
||||
{
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_IPC;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
EXPECT_FALSE(driverHandle->devices[0]->getNEODevice()->getDeviceTimestampPoolAllocator().isPoolBuffer(eventPool->getAllocation().getDefaultGraphicsAllocation()));
|
||||
}
|
||||
{
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
ASSERT_NE(nullptr, eventPool);
|
||||
|
||||
EXPECT_TRUE(driverHandle->devices[0]->getNEODevice()->getDeviceTimestampPoolAllocator().isPoolBuffer(eventPool->getAllocation().getDefaultGraphicsAllocation()));
|
||||
}
|
||||
}
|
||||
|
||||
struct EventPoolIpcMockGraphicsAllocation : public NEO::MockGraphicsAllocation {
|
||||
using NEO::MockGraphicsAllocation::MockGraphicsAllocation;
|
||||
|
||||
@@ -1613,6 +1671,190 @@ HWTEST2_F(EventCreate, givenPlatformNotSupportsMultTileWhenDebugKeyIsSetToUseCon
|
||||
event->destroy();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct MockL0GfxCoreHelperAlwaysAllocateEventInLocalMemHw : L0::L0GfxCoreHelperHw<GfxFamily> {
|
||||
bool alwaysAllocateEventInLocalMem() const override { return true; }
|
||||
};
|
||||
|
||||
HWTEST_F(EventCreate, GivenEnabledTimestampPoolAllocatorAndForcedEventAllocateInLocalMemoryWhenCreatingMultipleEventPoolsForSingleDeviceThenEventsUseSharedAllocationAndHaveUniqueAddresses) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(1);
|
||||
|
||||
MockL0GfxCoreHelperAlwaysAllocateEventInLocalMemHw<FamilyType> mockL0GfxCoreHelper{};
|
||||
std::unique_ptr<ApiGfxCoreHelper> l0GfxCoreHelperBackup(static_cast<ApiGfxCoreHelper *>(&mockL0GfxCoreHelper));
|
||||
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup);
|
||||
|
||||
ASSERT_TRUE(device->getNEODevice()->getDeviceTimestampPoolAllocator().isEnabled());
|
||||
|
||||
ze_device_handle_t devices[] = {device->toHandle()};
|
||||
|
||||
std::vector<std::unique_ptr<L0::EventPool>> eventPools;
|
||||
std::vector<std::unique_ptr<Event>> events;
|
||||
std::set<uint64_t> gpuAddresses;
|
||||
|
||||
constexpr size_t numEventPools = 5;
|
||||
constexpr size_t numEventsInPool = 2;
|
||||
constexpr size_t numEvents = numEventPools * numEventsInPool;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
nullptr,
|
||||
ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
|
||||
numEventsInPool};
|
||||
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_DEVICE,
|
||||
ZE_EVENT_SCOPE_FLAG_DEVICE};
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
for (size_t i = 0; i < numEventPools; i++) {
|
||||
eventPools.emplace_back(EventPool::create(driverHandle.get(), context, 1, devices, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, eventPools[i]);
|
||||
|
||||
for (size_t j = 0; j < numEventsInPool; j++) {
|
||||
eventDesc.index = static_cast<uint32_t>(j);
|
||||
events.emplace_back(static_cast<Event *>(getHelper<L0GfxCoreHelper>().createEvent(eventPools[i].get(), &eventDesc, device)));
|
||||
EXPECT_NE(nullptr, events.back());
|
||||
}
|
||||
}
|
||||
|
||||
const auto expectedSharedAllocation = events[0]->getAllocation(device);
|
||||
EXPECT_TRUE(device->getNEODevice()->getDeviceTimestampPoolAllocator().isPoolBuffer(expectedSharedAllocation));
|
||||
|
||||
for (auto &event : events) {
|
||||
EXPECT_EQ(expectedSharedAllocation, event->getAllocation(device));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device);
|
||||
auto [iterator, wasInserted] = gpuAddresses.insert(gpuAddress);
|
||||
EXPECT_TRUE(wasInserted) << "Duplicate GPU address found: " << std::hex << "0x" << gpuAddress;
|
||||
}
|
||||
|
||||
EXPECT_EQ(numEvents, gpuAddresses.size());
|
||||
|
||||
device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[0]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup);
|
||||
l0GfxCoreHelperBackup.release();
|
||||
}
|
||||
|
||||
HWTEST_F(EventPoolCreateMultiDevice, GivenEnabledTimestampPoolAllocatorAndForcedLocalMemoryWhenCreatingEventPoolsForTwoDevicesThenEventsShareAllocationWithinDeviceButNotBetweenDevices) {
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(1);
|
||||
|
||||
MockL0GfxCoreHelperAlwaysAllocateEventInLocalMemHw<FamilyType> mockL0GfxCoreHelper0{};
|
||||
MockL0GfxCoreHelperAlwaysAllocateEventInLocalMemHw<FamilyType> mockL0GfxCoreHelper1{};
|
||||
|
||||
std::unique_ptr<ApiGfxCoreHelper> l0GfxCoreHelperBackup0(static_cast<ApiGfxCoreHelper *>(&mockL0GfxCoreHelper0));
|
||||
std::unique_ptr<ApiGfxCoreHelper> l0GfxCoreHelperBackup1(static_cast<ApiGfxCoreHelper *>(&mockL0GfxCoreHelper1));
|
||||
|
||||
ASSERT_GE(driverHandle->devices.size(), 2u);
|
||||
|
||||
auto device0 = driverHandle->devices[0];
|
||||
auto device1 = driverHandle->devices[1];
|
||||
auto neoDevice0 = device0->getNEODevice();
|
||||
auto neoDevice1 = device1->getNEODevice();
|
||||
|
||||
ASSERT_TRUE(neoDevice0->getDeviceTimestampPoolAllocator().isEnabled());
|
||||
ASSERT_TRUE(neoDevice1->getDeviceTimestampPoolAllocator().isEnabled());
|
||||
|
||||
neoDevice0->getExecutionEnvironment()->rootDeviceEnvironments[0]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup0);
|
||||
neoDevice1->getExecutionEnvironment()->rootDeviceEnvironments[1]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup1);
|
||||
|
||||
std::vector<std::unique_ptr<L0::EventPool>> eventPoolsDevice0;
|
||||
std::vector<std::unique_ptr<L0::EventPool>> eventPoolsDevice1;
|
||||
std::vector<std::unique_ptr<Event>> eventsDevice0;
|
||||
std::vector<std::unique_ptr<Event>> eventsDevice1;
|
||||
std::set<uint64_t> gpuAddressesDevice0;
|
||||
std::set<uint64_t> gpuAddressesDevice1;
|
||||
|
||||
constexpr size_t numEventPools = 3;
|
||||
constexpr size_t numEventsInPool = 2;
|
||||
constexpr size_t numEvents = numEventPools * numEventsInPool;
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_POOL_DESC,
|
||||
nullptr,
|
||||
ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
|
||||
numEventsInPool};
|
||||
|
||||
ze_event_desc_t eventDesc = {
|
||||
ZE_STRUCTURE_TYPE_EVENT_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
ZE_EVENT_SCOPE_FLAG_DEVICE,
|
||||
ZE_EVENT_SCOPE_FLAG_DEVICE};
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
|
||||
auto &l0GfxCoreHelper = neoDevice0->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
|
||||
|
||||
// Create events for device0
|
||||
ze_device_handle_t devices0[] = {device0->toHandle()};
|
||||
for (size_t i = 0; i < numEventPools; i++) {
|
||||
eventPoolsDevice0.emplace_back(EventPool::create(driverHandle.get(), context, 1, devices0, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, eventPoolsDevice0[i]);
|
||||
|
||||
for (size_t j = 0; j < numEventsInPool; j++) {
|
||||
eventDesc.index = static_cast<uint32_t>(j);
|
||||
eventsDevice0.emplace_back(static_cast<Event *>(l0GfxCoreHelper.createEvent(eventPoolsDevice0[i].get(), &eventDesc, device0)));
|
||||
EXPECT_NE(nullptr, eventsDevice0.back());
|
||||
}
|
||||
}
|
||||
|
||||
// Create events for device1
|
||||
ze_device_handle_t devices1[] = {device1->toHandle()};
|
||||
for (size_t i = 0; i < numEventPools; i++) {
|
||||
eventPoolsDevice1.emplace_back(EventPool::create(driverHandle.get(), context, 1, devices1, &eventPoolDesc, result));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, eventPoolsDevice1[i]);
|
||||
|
||||
for (size_t j = 0; j < numEventsInPool; j++) {
|
||||
eventDesc.index = static_cast<uint32_t>(j);
|
||||
eventsDevice1.emplace_back(static_cast<Event *>(l0GfxCoreHelper.createEvent(eventPoolsDevice1[i].get(), &eventDesc, device1)));
|
||||
EXPECT_NE(nullptr, eventsDevice1.back());
|
||||
}
|
||||
}
|
||||
|
||||
// Verify allocations and GPU addresses for device0
|
||||
const auto expectedSharedAllocationDevice0 = eventsDevice0[0]->getAllocation(device0);
|
||||
EXPECT_TRUE(neoDevice0->getDeviceTimestampPoolAllocator().isPoolBuffer(expectedSharedAllocationDevice0));
|
||||
EXPECT_FALSE(neoDevice1->getDeviceTimestampPoolAllocator().isPoolBuffer(expectedSharedAllocationDevice0));
|
||||
|
||||
for (auto &event : eventsDevice0) {
|
||||
EXPECT_EQ(expectedSharedAllocationDevice0, event->getAllocation(device0));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device0);
|
||||
auto [iterator, wasInserted] = gpuAddressesDevice0.insert(gpuAddress);
|
||||
EXPECT_TRUE(wasInserted) << "Duplicate GPU address found for device0: " << std::hex << "0x" << gpuAddress;
|
||||
}
|
||||
|
||||
// Verify allocations and GPU addresses for device1
|
||||
const auto expectedSharedAllocationDevice1 = eventsDevice1[0]->getAllocation(device1);
|
||||
EXPECT_TRUE(neoDevice1->getDeviceTimestampPoolAllocator().isPoolBuffer(expectedSharedAllocationDevice1));
|
||||
EXPECT_FALSE(neoDevice0->getDeviceTimestampPoolAllocator().isPoolBuffer(expectedSharedAllocationDevice1));
|
||||
|
||||
for (auto &event : eventsDevice1) {
|
||||
EXPECT_EQ(expectedSharedAllocationDevice1, event->getAllocation(device1));
|
||||
|
||||
uint64_t gpuAddress = event->getGpuAddress(device1);
|
||||
auto [iterator, wasInserted] = gpuAddressesDevice1.insert(gpuAddress);
|
||||
EXPECT_TRUE(wasInserted) << "Duplicate GPU address found for device1: " << std::hex << "0x" << gpuAddress;
|
||||
}
|
||||
|
||||
EXPECT_NE(expectedSharedAllocationDevice0, expectedSharedAllocationDevice1);
|
||||
EXPECT_EQ(numEvents, gpuAddressesDevice0.size());
|
||||
EXPECT_EQ(numEvents, gpuAddressesDevice1.size());
|
||||
|
||||
neoDevice0->getExecutionEnvironment()->rootDeviceEnvironments[0]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup0);
|
||||
neoDevice1->getExecutionEnvironment()->rootDeviceEnvironments[1]->apiGfxCoreHelper.swap(l0GfxCoreHelperBackup1);
|
||||
l0GfxCoreHelperBackup0.release();
|
||||
l0GfxCoreHelperBackup1.release();
|
||||
}
|
||||
|
||||
using EventSynchronizeTest = Test<EventFixture<1, 0>>;
|
||||
using EventUsedPacketSignalSynchronizeTest = Test<EventUsedPacketSignalFixture<1, 0, 0, -1>>;
|
||||
|
||||
@@ -3853,6 +4095,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
|
||||
|
||||
auto event = whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device));
|
||||
auto eventAllocation = event->getAllocation(device);
|
||||
auto offsetInSharedAlloc = event->getOffsetInSharedAlloc();
|
||||
|
||||
EXPECT_TRUE(eventAllocation->getAubInfo().writeMemoryOnly);
|
||||
|
||||
@@ -3868,7 +4111,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
|
||||
EXPECT_EQ(eventAllocation, ultCsr.writeMemoryParams.latestGfxAllocation);
|
||||
EXPECT_TRUE(ultCsr.writeMemoryParams.latestChunkedMode);
|
||||
EXPECT_EQ(sizeof(uint64_t) * expectedCallCount, ultCsr.writeMemoryParams.latestChunkSize);
|
||||
EXPECT_EQ(0u, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
EXPECT_EQ(0u + offsetInSharedAlloc, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
EXPECT_FALSE(eventAllocation->isTbxWritable(expectedBanks));
|
||||
|
||||
auto status = event->hostSignal(false);
|
||||
@@ -3879,7 +4122,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
|
||||
EXPECT_EQ(eventAllocation, ultCsr.writeMemoryParams.latestGfxAllocation);
|
||||
EXPECT_TRUE(ultCsr.writeMemoryParams.latestChunkedMode);
|
||||
EXPECT_EQ(event->getSinglePacketSize(), ultCsr.writeMemoryParams.latestChunkSize);
|
||||
EXPECT_EQ(0u, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
EXPECT_EQ(0u + offsetInSharedAlloc, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
|
||||
EXPECT_FALSE(eventAllocation->isTbxWritable(expectedBanks));
|
||||
|
||||
@@ -3899,7 +4142,7 @@ HWTEST_F(EventTests, GivenCsrTbxModeWhenEventCreatedAndSignaledThenEventAllocati
|
||||
EXPECT_EQ(eventAllocation, ultCsr.writeMemoryParams.latestGfxAllocation);
|
||||
EXPECT_TRUE(ultCsr.writeMemoryParams.latestChunkedMode);
|
||||
EXPECT_EQ(event->getSinglePacketSize(), ultCsr.writeMemoryParams.latestChunkSize);
|
||||
EXPECT_EQ(0u, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
EXPECT_EQ(0u + offsetInSharedAlloc, ultCsr.writeMemoryParams.latestGpuVaChunkOffset);
|
||||
|
||||
EXPECT_FALSE(eventAllocation->isTbxWritable(expectedBanks));
|
||||
|
||||
|
||||
@@ -250,6 +250,8 @@ HWTEST2_F(ExternalSemaphoreTest, givenAppendSignalEventFailsWhenAppendSignalExte
|
||||
}
|
||||
|
||||
HWTEST2_F(ExternalSemaphoreTest, givenFailingMemoryManagerWhenAppendSignalExternalSemaphoresExpIsCalledThenErrorIsReturned, MatchAny) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
auto externalSemaphore = std::make_unique<ExternalSemaphoreImp>();
|
||||
auto failMemoryManager = std::make_unique<FailMemoryManager>();
|
||||
auto l0Device = std::make_unique<MockDeviceImp>(neoDevice, neoDevice->getExecutionEnvironment());
|
||||
@@ -269,6 +271,8 @@ HWTEST2_F(ExternalSemaphoreTest, givenFailingMemoryManagerWhenAppendSignalExtern
|
||||
cmdList.initialize(l0Device.get(), NEO::EngineGroupType::renderCompute, 0u);
|
||||
cmdList.setCmdListContext(context);
|
||||
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(0);
|
||||
|
||||
ze_external_semaphore_signal_params_ext_t signalParams = {};
|
||||
ze_external_semaphore_ext_handle_t hSemaphore = externalSemaphore->toHandle();
|
||||
ze_result_t result = cmdList.appendSignalExternalSemaphores(1, &hSemaphore, &signalParams, nullptr, 0, nullptr);
|
||||
@@ -400,6 +404,8 @@ HWTEST2_F(ExternalSemaphoreTest, givenAppendSignalEventFailsWhenAppendWaitExtern
|
||||
}
|
||||
|
||||
HWTEST2_F(ExternalSemaphoreTest, givenFailingMemoryManagerWhenAppendWaitExternalSemaphoresExpIsCalledThenErrorIsReturned, MatchAny) {
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
auto externalSemaphore = std::make_unique<ExternalSemaphoreImp>();
|
||||
auto failMemoryManager = std::make_unique<FailMemoryManager>();
|
||||
auto l0Device = std::make_unique<MockDeviceImp>(neoDevice, neoDevice->getExecutionEnvironment());
|
||||
@@ -419,6 +425,8 @@ HWTEST2_F(ExternalSemaphoreTest, givenFailingMemoryManagerWhenAppendWaitExternal
|
||||
cmdList.initialize(l0Device.get(), NEO::EngineGroupType::renderCompute, 0u);
|
||||
cmdList.setCmdListContext(context);
|
||||
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(0);
|
||||
|
||||
ze_external_semaphore_wait_params_ext_t waitParams = {};
|
||||
ze_external_semaphore_ext_handle_t hSemaphore = externalSemaphore->toHandle();
|
||||
ze_result_t result = cmdList.appendWaitExternalSemaphores(1, &hSemaphore, &waitParams, nullptr, 0, nullptr);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1602,6 +1602,8 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 12u);
|
||||
|
||||
NEO::debugManager.flags.OverrideEventSynchronizeTimeout.set(0);
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(0);
|
||||
|
||||
auto memoryManager = reinterpret_cast<MockMemoryManager *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.device.getDriverHandle()->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
@@ -1651,6 +1653,8 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenSplitBcsCopyAndImmediateCommandListWhe
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.events.createdFromLatestPool, 0u);
|
||||
|
||||
NEO::debugManager.flags.OverrideEventSynchronizeTimeout.set(0);
|
||||
NEO::debugManager.flags.EnableTimestampPoolAllocator.set(0);
|
||||
|
||||
auto memoryManager = reinterpret_cast<MockMemoryManager *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.device.getDriverHandle()->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
|
||||
@@ -302,6 +302,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceScratchAndMTPBufferSizeMode, -1, "-1: defau
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CFEStackIDControl, -1, "Set Stack ID Control in CFE_STATE on Xe2+, -1 - do not set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, StandaloneInOrderTimestampAllocationEnabled, -1, "-1: default, 0: disabled, 1: enabled. If enabled, use internal allocations, instead of Event pool for timestamps")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ClearStandaloneInOrderTimestampAllocation, -1, "-1: default, 0: disabled, 1: enabled. If clear allocation before sending to GPU")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampPoolAllocator, -1, "-1: default, 0: disabled, 1: enabled. If enabled, timestamp allocations are pooled and reused across multiple event pools")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlushWithWrite, -1, "-1: ignore. >=0: Force PostSync cache flush and override postSync immediate write address to given value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DeferStateInitSubmissionToFirstRegularUsage, -1, "-1: ignore, 0: disabled, 1: enabled. If set, instead of initializing at Device creation, submit initial state during first usage (eg. kernel submission)")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations")
|
||||
|
||||
@@ -43,7 +43,7 @@ extern CommandStreamReceiver *createCommandStream(ExecutionEnvironment &executio
|
||||
const DeviceBitfield deviceBitfield);
|
||||
|
||||
Device::Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex)
|
||||
: executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex), isaPoolAllocator(this) {
|
||||
: executionEnvironment(executionEnvironment), rootDeviceIndex(rootDeviceIndex), isaPoolAllocator(this), deviceTimestampPoolAllocator(this) {
|
||||
this->executionEnvironment->incRefInternal();
|
||||
this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->setDummyBlitProperties(rootDeviceIndex);
|
||||
if (auto ailHelper = this->executionEnvironment->rootDeviceEnvironments[rootDeviceIndex]->getAILConfigurationHelper(); ailHelper && ailHelper->isAdjustMicrosecondResolutionRequired()) {
|
||||
@@ -73,6 +73,7 @@ Device::~Device() {
|
||||
|
||||
syncBufferHandler.reset();
|
||||
isaPoolAllocator.releasePools();
|
||||
deviceTimestampPoolAllocator.releasePools();
|
||||
if (deviceUsmMemAllocPoolsManager) {
|
||||
deviceUsmMemAllocPoolsManager->cleanup();
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/source/utilities/isa_pool_allocator.h"
|
||||
#include "shared/source/utilities/reference_tracked_object.h"
|
||||
#include "shared/source/utilities/timestamp_pool_allocator.h"
|
||||
|
||||
#include <array>
|
||||
#include <mutex>
|
||||
@@ -201,6 +202,9 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
||||
ISAPoolAllocator &getIsaPoolAllocator() {
|
||||
return isaPoolAllocator;
|
||||
}
|
||||
TimestampPoolAllocator &getDeviceTimestampPoolAllocator() {
|
||||
return deviceTimestampPoolAllocator;
|
||||
}
|
||||
UsmMemAllocPoolsManager *getUsmMemAllocPoolsManager() {
|
||||
return deviceUsmMemAllocPoolsManager.get();
|
||||
}
|
||||
@@ -325,6 +329,7 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
||||
std::vector<RTDispatchGlobalsInfo *> rtDispatchGlobalsInfos;
|
||||
|
||||
ISAPoolAllocator isaPoolAllocator;
|
||||
TimestampPoolAllocator deviceTimestampPoolAllocator;
|
||||
std::unique_ptr<UsmMemAllocPoolsManager> deviceUsmMemAllocPoolsManager;
|
||||
|
||||
std::atomic_uint32_t bufferPoolCount = 0u;
|
||||
|
||||
@@ -35,6 +35,7 @@ set(NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/perf_profiler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/range.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/reference_tracked_object.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/shared_pool_allocation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/software_tags.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/software_tags.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/software_tags_manager.cpp
|
||||
@@ -47,6 +48,8 @@ set(NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/time_measure_wrapper.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timer_util.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_pool_allocator.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_pool_allocator.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wait_util.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator.cpp
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.h"
|
||||
#include "shared/source/utilities/shared_pool_allocation.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
@@ -17,33 +18,7 @@ namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
class Device;
|
||||
|
||||
class SharedIsaAllocation {
|
||||
public:
|
||||
SharedIsaAllocation(GraphicsAllocation *graphicsAllocation, size_t offset, size_t size, std::mutex *mtx)
|
||||
: graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(*mtx){};
|
||||
|
||||
GraphicsAllocation *getGraphicsAllocation() const {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
size_t getOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t getSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> obtainSharedAllocationLock() {
|
||||
return std::unique_lock<std::mutex>(mtx);
|
||||
}
|
||||
|
||||
private:
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
const size_t offset;
|
||||
const size_t size;
|
||||
std::mutex &mtx; // This mutex is shared across all users of this GA
|
||||
};
|
||||
using SharedIsaAllocation = SharedPoolAllocation;
|
||||
|
||||
// Each shared GA is maintained by single ISAPool
|
||||
class ISAPool : public AbstractBuffersPool<ISAPool, GraphicsAllocation> {
|
||||
|
||||
43
shared/source/utilities/shared_pool_allocation.h
Normal file
43
shared/source/utilities/shared_pool_allocation.h
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
|
||||
class SharedPoolAllocation {
|
||||
public:
|
||||
SharedPoolAllocation(GraphicsAllocation *graphicsAllocation, size_t offset, size_t size, std::mutex *mtx)
|
||||
: graphicsAllocation(graphicsAllocation), offset(offset), size(size), mtx(*mtx){};
|
||||
|
||||
GraphicsAllocation *getGraphicsAllocation() const {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
||||
size_t getOffset() const {
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t getSize() const {
|
||||
return size;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> obtainSharedAllocationLock() {
|
||||
return std::unique_lock<std::mutex>(mtx);
|
||||
}
|
||||
|
||||
private:
|
||||
GraphicsAllocation *graphicsAllocation;
|
||||
const size_t offset;
|
||||
const size_t size;
|
||||
std::mutex &mtx; // This mutex is shared across all users of this GA
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
114
shared/source/utilities/timestamp_pool_allocator.cpp
Normal file
114
shared/source/utilities/timestamp_pool_allocator.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/timestamp_pool_allocator.h"
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
|
||||
namespace NEO {
|
||||
TimestampPool::TimestampPool(Device *device, size_t poolSize)
|
||||
: BaseType(device->getMemoryManager(), nullptr), device(device) {
|
||||
DEBUG_BREAK_IF(device->getProductHelper().is2MBLocalMemAlignmentEnabled() &&
|
||||
!isAligned(poolSize, MemoryConstants::pageSize2M));
|
||||
|
||||
AllocationProperties properties{device->getRootDeviceIndex(),
|
||||
poolSize,
|
||||
AllocationType::gpuTimestampDeviceBuffer,
|
||||
device->getDeviceBitfield()};
|
||||
auto graphicsAllocation = memoryManager->allocateGraphicsMemoryWithProperties(properties);
|
||||
|
||||
this->mainStorage.reset(graphicsAllocation);
|
||||
this->chunkAllocator.reset(new HeapAllocator(params.startingOffset, poolSize, MemoryConstants::pageSize, 0u));
|
||||
stackVec.push_back(graphicsAllocation);
|
||||
this->mtx = std::make_unique<std::mutex>();
|
||||
}
|
||||
|
||||
TimestampPool::TimestampPool(TimestampPool &&pool) : BaseType(std::move(pool)) {
|
||||
mtx.reset(pool.mtx.release());
|
||||
this->stackVec = std::move(pool.stackVec);
|
||||
this->device = pool.device;
|
||||
}
|
||||
|
||||
TimestampPool::~TimestampPool() {
|
||||
if (mainStorage) {
|
||||
device->getMemoryManager()->freeGraphicsMemory(mainStorage.release());
|
||||
}
|
||||
}
|
||||
|
||||
SharedTimestampAllocation *TimestampPool::allocate(size_t size) {
|
||||
auto offset = static_cast<size_t>(this->chunkAllocator->allocate(size));
|
||||
if (offset == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return new SharedTimestampAllocation{this->mainStorage.get(), offset - params.startingOffset, size, mtx.get()};
|
||||
}
|
||||
|
||||
const StackVec<GraphicsAllocation *, 1> &TimestampPool::getAllocationsVector() {
|
||||
return stackVec;
|
||||
}
|
||||
|
||||
TimestampPoolAllocator::TimestampPoolAllocator(Device *device) : device(device) {}
|
||||
|
||||
bool TimestampPoolAllocator::isEnabled() const {
|
||||
if (NEO::debugManager.flags.EnableTimestampPoolAllocator.get() != -1) {
|
||||
return NEO::debugManager.flags.EnableTimestampPoolAllocator.get();
|
||||
}
|
||||
|
||||
return device->getProductHelper().is2MBLocalMemAlignmentEnabled();
|
||||
}
|
||||
|
||||
SharedTimestampAllocation *TimestampPoolAllocator::requestGraphicsAllocationForTimestamp(size_t size) {
|
||||
if (size > maxAllocationSize) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(allocatorMtx);
|
||||
|
||||
if (bufferPools.empty()) {
|
||||
addNewBufferPool(TimestampPool(device, alignToPoolSize(defaultPoolSize)));
|
||||
}
|
||||
|
||||
auto allocFromPool = allocateFromPools(size);
|
||||
if (allocFromPool != nullptr) {
|
||||
return allocFromPool;
|
||||
}
|
||||
|
||||
this->drain();
|
||||
|
||||
allocFromPool = allocateFromPools(size);
|
||||
if (allocFromPool != nullptr) {
|
||||
return allocFromPool;
|
||||
}
|
||||
|
||||
addNewBufferPool(TimestampPool(device, alignToPoolSize(defaultPoolSize)));
|
||||
return allocateFromPools(size);
|
||||
}
|
||||
|
||||
void TimestampPoolAllocator::freeSharedTimestampAllocation(SharedTimestampAllocation *sharedTimestampAllocation) {
|
||||
std::unique_lock lock(allocatorMtx);
|
||||
tryFreeFromPoolBuffer(sharedTimestampAllocation->getGraphicsAllocation(), sharedTimestampAllocation->getOffset(), sharedTimestampAllocation->getSize());
|
||||
delete sharedTimestampAllocation;
|
||||
}
|
||||
|
||||
SharedTimestampAllocation *TimestampPoolAllocator::allocateFromPools(size_t size) {
|
||||
for (auto &pool : bufferPools) {
|
||||
if (auto allocation = pool.allocate(size)) {
|
||||
return allocation;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t TimestampPoolAllocator::alignToPoolSize(size_t size) const {
|
||||
return alignUp(size, poolAlignment);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
72
shared/source/utilities/timestamp_pool_allocator.h
Normal file
72
shared/source/utilities/timestamp_pool_allocator.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.h"
|
||||
#include "shared/source/utilities/shared_pool_allocation.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
class Device;
|
||||
|
||||
using SharedTimestampAllocation = SharedPoolAllocation;
|
||||
|
||||
class TimestampPool : public AbstractBuffersPool<TimestampPool, GraphicsAllocation> {
|
||||
using BaseType = AbstractBuffersPool<TimestampPool, GraphicsAllocation>;
|
||||
|
||||
public:
|
||||
TimestampPool(Device *device, size_t poolSize);
|
||||
|
||||
TimestampPool(const TimestampPool &) = delete;
|
||||
TimestampPool &operator=(const TimestampPool &) = delete;
|
||||
|
||||
TimestampPool(TimestampPool &&pool);
|
||||
TimestampPool &operator=(TimestampPool &&) = delete;
|
||||
|
||||
~TimestampPool() override;
|
||||
|
||||
SharedTimestampAllocation *allocate(size_t size);
|
||||
const StackVec<GraphicsAllocation *, 1> &getAllocationsVector();
|
||||
|
||||
private:
|
||||
Device *device;
|
||||
StackVec<GraphicsAllocation *, 1> stackVec;
|
||||
std::unique_ptr<std::mutex> mtx;
|
||||
};
|
||||
|
||||
class TimestampPoolAllocator : public AbstractBuffersAllocator<TimestampPool, GraphicsAllocation> {
|
||||
public:
|
||||
TimestampPoolAllocator(Device *device);
|
||||
|
||||
bool isEnabled() const;
|
||||
|
||||
SharedTimestampAllocation *requestGraphicsAllocationForTimestamp(size_t size);
|
||||
void freeSharedTimestampAllocation(SharedTimestampAllocation *sharedTimestampAllocation);
|
||||
|
||||
size_t getDefaultPoolSize() const { return defaultPoolSize; }
|
||||
|
||||
private:
|
||||
SharedTimestampAllocation *allocateFromPools(size_t size);
|
||||
size_t alignToPoolSize(size_t size) const;
|
||||
|
||||
const size_t maxAllocationSize = 2 * MemoryConstants::megaByte;
|
||||
const size_t defaultPoolSize = 4 * MemoryConstants::megaByte;
|
||||
const size_t poolAlignment = MemoryConstants::pageSize2M;
|
||||
|
||||
Device *device;
|
||||
std::mutex allocatorMtx;
|
||||
};
|
||||
|
||||
static_assert(NEO::NonCopyable<TimestampPool>);
|
||||
|
||||
} // namespace NEO
|
||||
@@ -661,6 +661,7 @@ LogIndirectDetectionKernelDetails = 0
|
||||
DirectSubmissionRelaxedOrderingCounterHeuristic = -1
|
||||
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
|
||||
ClearStandaloneInOrderTimestampAllocation = -1
|
||||
EnableTimestampPoolAllocator = -1
|
||||
PipelinedEuThreadArbitration = -1
|
||||
ExperimentalUSMAllocationReuseCleaner = -1
|
||||
DummyPageBackingEnabled = 0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2019-2024 Intel Corporation
|
||||
# Copyright (C) 2019-2025 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -26,6 +26,7 @@ target_sources(neo_shared_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_pool_allocator_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/wait_util_tests.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator_tests.cpp
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/mock_product_helper.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
using TimestampPoolAllocatorTest = Test<DeviceFixture>;
|
||||
|
||||
namespace {
|
||||
void verifySharedTimestampAllocation(const SharedTimestampAllocation *sharedAllocation,
|
||||
size_t expectedOffset,
|
||||
size_t expectedSize) {
|
||||
ASSERT_NE(nullptr, sharedAllocation);
|
||||
EXPECT_NE(nullptr, sharedAllocation->getGraphicsAllocation());
|
||||
EXPECT_EQ(expectedOffset, sharedAllocation->getOffset());
|
||||
EXPECT_EQ(expectedSize, sharedAllocation->getSize());
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenNoAllocationsThenCreateNewAllocation) {
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
|
||||
|
||||
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(allocation, 0ul, requestAllocationSize);
|
||||
EXPECT_EQ(AllocationType::gpuTimestampDeviceBuffer,
|
||||
allocation->getGraphicsAllocation()->getAllocationType());
|
||||
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenAllocationsExistThenReuseAllocation) {
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
|
||||
|
||||
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(allocation, 0ul, requestAllocationSize);
|
||||
|
||||
auto allocationSize = allocation->getGraphicsAllocation()->getUnderlyingBufferSize();
|
||||
auto numOfSharedAllocations = allocationSize / requestAllocationSize;
|
||||
|
||||
// Perform requests until allocation is full
|
||||
for (auto i = 1u; i < numOfSharedAllocations; i++) {
|
||||
auto tempSharedAllocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(tempSharedAllocation, requestAllocationSize * i, requestAllocationSize);
|
||||
EXPECT_EQ(allocation->getGraphicsAllocation(), tempSharedAllocation->getGraphicsAllocation());
|
||||
timestampAllocator.freeSharedTimestampAllocation(tempSharedAllocation);
|
||||
}
|
||||
|
||||
// Verify that draining freed chunks is correct and allocation can be reused
|
||||
auto newAllocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(newAllocation, requestAllocationSize, requestAllocationSize);
|
||||
EXPECT_EQ(allocation->getGraphicsAllocation(), newAllocation->getGraphicsAllocation());
|
||||
|
||||
timestampAllocator.freeSharedTimestampAllocation(newAllocation);
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenPoolIsFullThenCreateNewPool) {
|
||||
// This test verifies that:
|
||||
// 1. First two allocations of size=poolSize/2 come from the same pool
|
||||
// 2. When pool becomes full (after two allocations), a new pool is created
|
||||
// 3. Third allocation comes from the new pool (different GraphicsAllocation)
|
||||
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
|
||||
// Request half of pool size to ensure exactly 2 allocations fit in one pool
|
||||
size_t requestAllocationSize = timestampAllocator.getDefaultPoolSize() / 2;
|
||||
|
||||
// First allocation - should come from first pool
|
||||
auto allocation1 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(allocation1, 0, requestAllocationSize);
|
||||
|
||||
// Second allocation - should come from first pool but with offset
|
||||
auto allocation2 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(allocation2, requestAllocationSize, requestAllocationSize);
|
||||
EXPECT_EQ(allocation1->getGraphicsAllocation(), allocation2->getGraphicsAllocation());
|
||||
|
||||
// Third allocation - should create new pool because first one is full
|
||||
auto allocation3 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
verifySharedTimestampAllocation(allocation3, 0, requestAllocationSize);
|
||||
EXPECT_NE(allocation1->getGraphicsAllocation(), allocation3->getGraphicsAllocation());
|
||||
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation1);
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation2);
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation3);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenRequestExceedsMaxSizeThenReturnNull) {
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
constexpr size_t requestAllocationSize = 3 * MemoryConstants::megaByte; // Larger than maxAllocationSize
|
||||
|
||||
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
EXPECT_EQ(nullptr, allocation);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, whenCheckingIsEnabledWithDifferentSettingsThenReturnsExpectedValue) {
|
||||
auto mockProductHelper = new MockProductHelper;
|
||||
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
|
||||
{
|
||||
debugManager.flags.EnableTimestampPoolAllocator.set(0);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
EXPECT_FALSE(timestampAllocator.isEnabled());
|
||||
}
|
||||
{
|
||||
debugManager.flags.EnableTimestampPoolAllocator.set(-1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
|
||||
|
||||
EXPECT_FALSE(timestampAllocator.isEnabled());
|
||||
}
|
||||
{
|
||||
debugManager.flags.EnableTimestampPoolAllocator.set(1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
|
||||
|
||||
EXPECT_TRUE(timestampAllocator.isEnabled());
|
||||
}
|
||||
{
|
||||
debugManager.flags.EnableTimestampPoolAllocator.set(-1);
|
||||
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
|
||||
|
||||
EXPECT_TRUE(timestampAllocator.isEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenPoolSizeAlignmentRequestedThenReturnsAlignedSize) {
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
|
||||
|
||||
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
ASSERT_NE(nullptr, allocation);
|
||||
|
||||
auto allocationSize = allocation->getGraphicsAllocation()->getUnderlyingBufferSize();
|
||||
EXPECT_EQ(0u, allocationSize % MemoryConstants::pageSize2M);
|
||||
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPoolAllocatorTest, givenFailingMemoryManagerWhenRequestingAllocationThenReturnNull) {
|
||||
auto ×tampAllocator = pDevice->getDeviceTimestampPoolAllocator();
|
||||
|
||||
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
|
||||
memoryManager->isMockHostMemoryManager = true;
|
||||
memoryManager->forceFailureInPrimaryAllocation = true;
|
||||
|
||||
size_t requestAllocationSize = timestampAllocator.getDefaultPoolSize() / 2;
|
||||
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
|
||||
EXPECT_EQ(nullptr, allocation);
|
||||
|
||||
if (allocation) {
|
||||
timestampAllocator.freeSharedTimestampAllocation(allocation);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user