feature: initial support for sync dispatch token allocation

Related-To: NEO-8171

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-03-26 10:02:52 +00:00
committed by Compute-Runtime-Automation
parent 8acb4678fb
commit 74dcb5a9e3
12 changed files with 79 additions and 17 deletions

View File

@@ -56,7 +56,27 @@ uint32_t Device::getNextSyncDispatchQueueId() {
UNRECOVERABLE_IF(newValue == (std::numeric_limits<uint32_t>::max() - 1)); UNRECOVERABLE_IF(newValue == (std::numeric_limits<uint32_t>::max() - 1));
ensureSyncDispatchTokenAllocation();
return newValue; return newValue;
} }
void Device::ensureSyncDispatchTokenAllocation() {
if (!syncDispatchTokenAllocation) {
std::unique_lock<std::mutex> lock(syncDispatchTokenMutex);
if (!syncDispatchTokenAllocation) {
const NEO::AllocationProperties allocationProperties(getRootDeviceIndex(), true, MemoryConstants::pageSize,
NEO::AllocationType::syncDispatchToken,
true, false, getNEODevice()->getDeviceBitfield());
syncDispatchTokenAllocation = getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(syncDispatchTokenAllocation == nullptr);
memset(syncDispatchTokenAllocation->getUnderlyingBuffer(), 0, syncDispatchTokenAllocation->getUnderlyingBufferSize());
}
}
}
} // namespace L0 } // namespace L0

View File

@@ -152,12 +152,15 @@ struct Device : _ze_device_handle_t {
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator(); NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
NEO::TagAllocatorBase *getHostInOrderCounterAllocator(); NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
uint32_t getNextSyncDispatchQueueId(); uint32_t getNextSyncDispatchQueueId();
void ensureSyncDispatchTokenAllocation();
protected: protected:
NEO::Device *neoDevice = nullptr; NEO::Device *neoDevice = nullptr;
std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator; std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator;
std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator; std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator;
NEO::GraphicsAllocation *syncDispatchTokenAllocation = nullptr;
std::mutex inOrderAllocatorMutex; std::mutex inOrderAllocatorMutex;
std::mutex syncDispatchTokenMutex;
std::atomic<uint32_t> syncDispatchQueueIdAllocator = 0; std::atomic<uint32_t> syncDispatchQueueIdAllocator = 0;
bool implicitScalingCapable = false; bool implicitScalingCapable = false;
}; };

View File

@@ -1438,6 +1438,8 @@ void DeviceImp::releaseResources() {
UNRECOVERABLE_IF(neoDevice == nullptr); UNRECOVERABLE_IF(neoDevice == nullptr);
getNEODevice()->getMemoryManager()->freeGraphicsMemory(syncDispatchTokenAllocation);
this->bcsSplit.releaseResources(); this->bcsSplit.releaseResources();
if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get()) { if (neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->debugger.get()) {

View File

@@ -109,6 +109,7 @@ struct MockDeviceImp : public L0::DeviceImp {
using Base::implicitScalingCapable; using Base::implicitScalingCapable;
using Base::neoDevice; using Base::neoDevice;
using Base::subDeviceCopyEngineGroups; using Base::subDeviceCopyEngineGroups;
using Base::syncDispatchTokenAllocation;
MockDeviceImp(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) { MockDeviceImp(NEO::Device *device, NEO::ExecutionEnvironment *execEnv) {
device->incRefInternal(); device->incRefInternal();

View File

@@ -5936,5 +5936,27 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenMultiTileSyncDispatchQueueWhe
EXPECT_EQ(3u, immCmdList1->syncDispatchQueueId); EXPECT_EQ(3u, immCmdList1->syncDispatchQueueId);
EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList1->synchronizedDispatchMode); EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList1->synchronizedDispatchMode);
} }
HWTEST2_F(MultiTileSynchronizedDispatchTests, givenSyncDispatchEnabledWhenAllocatingQueueIdThenEnsureTokenAllocation, IsAtLeastSkl) {
NEO::debugManager.flags.ForceSynchronizedDispatchMode.set(1);
auto mockDevice = static_cast<MockDeviceImp *>(device);
EXPECT_EQ(nullptr, mockDevice->syncDispatchTokenAllocation);
auto immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList->synchronizedDispatchMode);
auto syncAllocation = mockDevice->syncDispatchTokenAllocation;
EXPECT_NE(nullptr, syncAllocation);
EXPECT_EQ(syncAllocation->getAllocationType(), NEO::AllocationType::syncDispatchToken);
immCmdList = createMultiTileImmCmdList<gfxCoreFamily>();
EXPECT_EQ(NEO::SynchronizedDispatchMode::full, immCmdList->synchronizedDispatchMode);
EXPECT_EQ(mockDevice->syncDispatchTokenAllocation, syncAllocation);
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2023 Intel Corporation * Copyright (C) 2021-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -116,6 +116,8 @@ const char *AppResourceHelper::getResourceTagStr(AllocationType type) {
return "TSKLIST"; return "TSKLIST";
case AllocationType::assertBuffer: case AllocationType::assertBuffer:
return "ASSRTBUF"; return "ASSRTBUF";
case AllocationType::syncDispatchToken:
return "SYNCTOK";
default: default:
return "NOTFOUND"; return "NOTFOUND";
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2022-2023 Intel Corporation * Copyright (C) 2022-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -58,6 +58,7 @@ enum class AllocationType {
swTagBuffer, swTagBuffer,
deferredTasksList, deferredTasksList,
assertBuffer, assertBuffer,
syncDispatchToken,
count count
}; };

View File

@@ -416,6 +416,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
case AllocationType::svmZeroCopy: case AllocationType::svmZeroCopy:
case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::preemption: case AllocationType::preemption:
case AllocationType::syncDispatchToken:
allow64KbPages = true; allow64KbPages = true;
default: default:
break; break;
@@ -501,6 +502,7 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo
case AllocationType::debugModuleArea: case AllocationType::debugModuleArea:
case AllocationType::gpuTimestampDeviceBuffer: case AllocationType::gpuTimestampDeviceBuffer:
case AllocationType::semaphoreBuffer: case AllocationType::semaphoreBuffer:
case AllocationType::syncDispatchToken:
allocationData.flags.resource48Bit = true; allocationData.flags.resource48Bit = true;
break; break;
default: default:

View File

@@ -246,6 +246,8 @@ const char *getAllocationTypeString(GraphicsAllocation const *graphicsAllocation
return "DEFERRED_TASKS_LIST"; return "DEFERRED_TASKS_LIST";
case AllocationType::assertBuffer: case AllocationType::assertBuffer:
return "ASSERT_BUFFER"; return "ASSERT_BUFFER";
case AllocationType::syncDispatchToken:
return "SYNC_DISPATCH_TOKEN";
default: default:
return "ILLEGAL_VALUE"; return "ILLEGAL_VALUE";
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2023 Intel Corporation * Copyright (C) 2021-2024 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -101,7 +101,8 @@ AllocationTypeTagTestCase allocationTypeTagValues[static_cast<int>(AllocationTyp
{AllocationType::gpuTimestampDeviceBuffer, "GPUTSDBF"}, {AllocationType::gpuTimestampDeviceBuffer, "GPUTSDBF"},
{AllocationType::swTagBuffer, "SWTAGBF"}, {AllocationType::swTagBuffer, "SWTAGBF"},
{AllocationType::deferredTasksList, "TSKLIST"}, {AllocationType::deferredTasksList, "TSKLIST"},
{AllocationType::assertBuffer, "ASSRTBUF"}}; {AllocationType::assertBuffer, "ASSRTBUF"},
{AllocationType::syncDispatchToken, "SYNCTOK"}};
class AllocationTypeTagString : public ::testing::TestWithParam<AllocationTypeTagTestCase> {}; class AllocationTypeTagString : public ::testing::TestWithParam<AllocationTypeTagTestCase> {};
TEST_P(AllocationTypeTagString, givenGraphicsAllocationTypeWhenCopyTagToStorageInfoThenCorrectTagIsReturned) { TEST_P(AllocationTypeTagString, givenGraphicsAllocationTypeWhenCopyTagToStorageInfoThenCorrectTagIsReturned) {

View File

@@ -515,6 +515,15 @@ TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThenSyst
EXPECT_FALSE(allocData.flags.useSystemMemory); EXPECT_FALSE(allocData.flags.useSystemMemory);
} }
TEST(MemoryManagerTest, givenSyncTokenTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsNotRequestedAndAllow64kPages) {
AllocationData allocData;
MockMemoryManager mockMemoryManager;
AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::syncDispatchToken, mockDeviceBitfield};
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
EXPECT_FALSE(allocData.flags.useSystemMemory);
EXPECT_TRUE(allocData.flags.allow64kbPages);
}
TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kbPagesAllowed) { TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kbPagesAllowed) {
AllocationData allocData; AllocationData allocData;
MockMemoryManager mockMemoryManager; MockMemoryManager mockMemoryManager;
@@ -523,20 +532,15 @@ TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen64kb
EXPECT_TRUE(allocData.flags.allow64kbPages); EXPECT_TRUE(allocData.flags.allow64kbPages);
} }
TEST(MemoryManagerTest, givenPreemptionTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { TEST(MemoryManagerTest, givenAllocationTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) {
AllocationData allocData;
MockMemoryManager mockMemoryManager; MockMemoryManager mockMemoryManager;
AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::preemption, mockDeviceBitfield};
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
EXPECT_TRUE(allocData.flags.resource48Bit);
}
TEST(MemoryManagerTest, givenDeferredTasksListTypeWhenGetAllocationDataIsCalledThen48BitResourceIsTrue) { for (auto &type : {AllocationType::preemption, AllocationType::deferredTasksList, AllocationType::syncDispatchToken}) {
AllocationData allocData; AllocationData allocData;
MockMemoryManager mockMemoryManager; AllocationProperties properties{mockRootDeviceIndex, 1, type, mockDeviceBitfield};
AllocationProperties properties{mockRootDeviceIndex, 1, AllocationType::deferredTasksList, mockDeviceBitfield}; mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties));
mockMemoryManager.getAllocationData(allocData, properties, nullptr, mockMemoryManager.createStorageInfoFromProperties(properties)); EXPECT_TRUE(allocData.flags.resource48Bit);
EXPECT_TRUE(allocData.flags.resource48Bit); }
} }
TEST(MemoryManagerTest, givenMCSTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { TEST(MemoryManagerTest, givenMCSTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) {
@@ -1090,6 +1094,7 @@ static const AllocationType allocationHaveToBeForcedTo48Bit[] = {
AllocationType::timestampPacketTagBuffer, AllocationType::timestampPacketTagBuffer,
AllocationType::semaphoreBuffer, AllocationType::semaphoreBuffer,
AllocationType::deferredTasksList, AllocationType::deferredTasksList,
AllocationType::syncDispatchToken,
}; };
static const AllocationType allocationsOptionallyForcedTo48Bit[] = { static const AllocationType allocationsOptionallyForcedTo48Bit[] = {

View File

@@ -576,7 +576,8 @@ TEST_F(WddmMemoryManagerTests, givenTypeWhenCallIsStatelessAccessRequiredThenPro
AllocationType::gpuTimestampDeviceBuffer, AllocationType::gpuTimestampDeviceBuffer,
AllocationType::swTagBuffer, AllocationType::swTagBuffer,
AllocationType::deferredTasksList, AllocationType::deferredTasksList,
AllocationType::assertBuffer}) { AllocationType::assertBuffer,
AllocationType::syncDispatchToken}) {
EXPECT_FALSE(wddmMemoryManager->isStatelessAccessRequired(type)); EXPECT_FALSE(wddmMemoryManager->isStatelessAccessRequired(type));
} }
} }