mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
refactor: add helper to set default split mode
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
42ab9b5829
commit
1f734d6f6a
@@ -13,6 +13,7 @@
|
||||
|
||||
#include "level_zero/core/source/device/device_imp.h"
|
||||
#include "level_zero/core/source/driver/driver_handle.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/driver_experimental/zex_api.h"
|
||||
|
||||
namespace L0 {
|
||||
@@ -44,6 +45,12 @@ bool BcsSplit::setupDevice(uint32_t productFamily, bool internalUsage, const ze_
|
||||
return true;
|
||||
}
|
||||
|
||||
events.aggregatedEventsMode = device.getL0GfxCoreHelper().bcsSplitAggregatedModeEnabled();
|
||||
|
||||
if (NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get() != -1) {
|
||||
events.aggregatedEventsMode = !!NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get();
|
||||
}
|
||||
|
||||
setupEnginesMask(bcsSplitSettings);
|
||||
|
||||
return setupQueues(bcsSplitSettings, productFamily);
|
||||
@@ -134,12 +141,6 @@ std::vector<CommandQueue *> &BcsSplit::getCmdQsForSplit(NEO::TransferDirection d
|
||||
return this->cmdQs;
|
||||
}
|
||||
|
||||
BcsSplit::Events::Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit) {
|
||||
if (NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get() != -1) {
|
||||
aggregatedEventsMode = !!NEO::debugManager.flags.SplitBcsAggregatedEventsMode.get();
|
||||
}
|
||||
};
|
||||
|
||||
size_t BcsSplit::Events::obtainAggregatedEventsForSplit(Context *context) {
|
||||
for (size_t i = 0; i < this->marker.size(); i++) {
|
||||
if (this->marker[i]->queryStatus() == ZE_RESULT_SUCCESS) {
|
||||
|
||||
@@ -58,7 +58,7 @@ struct BcsSplit {
|
||||
size_t createAggregatedEvent(Context *context);
|
||||
uint64_t *getNextAllocationForAggregatedEvent();
|
||||
|
||||
Events(BcsSplit &bcsSplit);
|
||||
Events(BcsSplit &bcsSplit) : bcsSplit(bcsSplit) {}
|
||||
} events;
|
||||
|
||||
std::vector<CommandQueue *> cmdQs;
|
||||
|
||||
@@ -116,6 +116,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
||||
virtual uint64_t getOaTimestampValidBits() const = 0;
|
||||
virtual CopyOffloadMode getDefaultCopyOffloadMode(bool additionalBlitPropertiesSupported) const = 0;
|
||||
virtual bool isDefaultCmdListWithCopyOffloadSupported(bool additionalBlitPropertiesSupported) const = 0;
|
||||
virtual bool bcsSplitAggregatedModeEnabled() const = 0;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelper() = default;
|
||||
@@ -174,6 +175,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
||||
uint64_t getOaTimestampValidBits() const override;
|
||||
CopyOffloadMode getDefaultCopyOffloadMode(bool additionalBlitPropertiesSupported) const override;
|
||||
bool isDefaultCmdListWithCopyOffloadSupported(bool additionalBlitPropertiesSupported) const override;
|
||||
bool bcsSplitAggregatedModeEnabled() const override;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelperHw() = default;
|
||||
|
||||
@@ -110,4 +110,9 @@ bool L0GfxCoreHelperHw<Family>::isDefaultCmdListWithCopyOffloadSupported(bool ad
|
||||
return (NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 2);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0GfxCoreHelperHw<Family>::bcsSplitAggregatedModeEnabled() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/core/source/image/image_hw.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
@@ -1086,6 +1087,18 @@ struct AggregatedBcsSplitTests : public ::testing::Test {
|
||||
uint32_t expectedNumRootDevices = 1;
|
||||
};
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, givenPlatformSupporingAggregatedSplitModeWhenInitializingThenEnableInBcsSplitObject, IsAtLeastXeHpcCore) {
|
||||
debugManager.flags.SplitBcsAggregatedEventsMode.set(-1);
|
||||
|
||||
BcsSplit bcsSplit(static_cast<L0::DeviceImp &>(*device));
|
||||
|
||||
bcsSplit.setupDevice(device->getHwInfo().platform.eProductFamily, false, nullptr, cmdList->getCsr(false));
|
||||
|
||||
EXPECT_EQ(device->getL0GfxCoreHelper().bcsSplitAggregatedModeEnabled(), bcsSplit.events.aggregatedEventsMode);
|
||||
|
||||
bcsSplit.releaseResources();
|
||||
}
|
||||
|
||||
HWTEST2_F(AggregatedBcsSplitTests, whenObtainCalledThenAggregatedEventsCreated, IsAtLeastXeHpcCore) {
|
||||
EXPECT_EQ(0u, bcsSplit->events.subcopy.size());
|
||||
EXPECT_TRUE(bcsSplit->events.aggregatedEventsMode);
|
||||
|
||||
@@ -3971,10 +3971,20 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
|
||||
bool aggregatedEventSplit = bcsSplit.events.aggregatedEventsMode;
|
||||
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
auto beginItor = itor;
|
||||
|
||||
auto signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device);
|
||||
auto engineOffset = aggregatedEventSplit ? submissionId : (submissionId * numLinkCopyEngines);
|
||||
|
||||
uint64_t signalSubCopyEventGpuVa = 0;
|
||||
|
||||
if (aggregatedEventSplit) {
|
||||
signalSubCopyEventGpuVa = bcsSplit.events.subcopy[engineOffset]->getInOrderExecInfo()->getBaseDeviceAddress();
|
||||
} else {
|
||||
signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + engineOffset]->getCompletionFieldGpuAddress(device);
|
||||
}
|
||||
|
||||
size_t numExpectedSemaphores = 0;
|
||||
|
||||
@@ -4005,20 +4015,38 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
|
||||
ASSERT_NE(nullptr, genCmdCast<XY_COPY_BLT *>(*itor));
|
||||
|
||||
if (!device->getProductHelper().useAdditionalBlitProperties()) {
|
||||
auto flushDwItor = find<MI_FLUSH_DW *>(++itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), flushDwItor);
|
||||
GenCmdList::iterator signalItor;
|
||||
|
||||
auto signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
if (aggregatedEventSplit) {
|
||||
signalItor = find<MI_ATOMIC *>(++itor, cmdList.end());
|
||||
|
||||
while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) {
|
||||
flushDwItor = find<MI_FLUSH_DW *>(++flushDwItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), flushDwItor);
|
||||
|
||||
signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
|
||||
auto signalSubCopyEvent = genCmdCast<MI_ATOMIC *>(*signalItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
|
||||
while (signalSubCopyEvent->getMemoryAddress() != signalSubCopyEventGpuVa) {
|
||||
signalItor = find<MI_ATOMIC *>(++signalItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), signalItor);
|
||||
|
||||
signalSubCopyEvent = genCmdCast<MI_ATOMIC *>(*signalItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
}
|
||||
} else {
|
||||
signalItor = find<MI_FLUSH_DW *>(++itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), signalItor);
|
||||
|
||||
auto signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*signalItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
|
||||
while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) {
|
||||
signalItor = find<MI_FLUSH_DW *>(++signalItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), signalItor);
|
||||
|
||||
signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*signalItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
}
|
||||
}
|
||||
itor = ++flushDwItor;
|
||||
|
||||
itor = ++signalItor;
|
||||
} else {
|
||||
ASSERT_TRUE(false);
|
||||
}
|
||||
@@ -4038,18 +4066,39 @@ void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_
|
||||
ASSERT_TRUE(verifyInOrderDependency<FamilyType>(semaphoreItor, submissionId, counterGpuAddress, immCmdList.isQwordInOrderCounter(), true));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
if (aggregatedEventSplit) {
|
||||
semaphoreItor = find<MI_SEMAPHORE_WAIT *>(semaphoreItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), semaphoreItor);
|
||||
|
||||
auto subCopyEventSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, subCopyEventSemaphore);
|
||||
|
||||
EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress());
|
||||
while (bcsSplit.events.subcopy[submissionId]->getInOrderExecInfo()->getBaseDeviceAddress() != subCopyEventSemaphore->getSemaphoreGraphicsAddress()) {
|
||||
semaphoreItor = find<MI_SEMAPHORE_WAIT *>(++semaphoreItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), semaphoreItor);
|
||||
|
||||
auto subCopyEventSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, subCopyEventSemaphore);
|
||||
}
|
||||
|
||||
itor = ++semaphoreItor;
|
||||
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // no marker event
|
||||
|
||||
} else {
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
auto subCopyEventSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, subCopyEventSemaphore);
|
||||
|
||||
EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
itor = ++semaphoreItor;
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // marker event
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // marker event
|
||||
|
||||
if (immCmdList.isHeaplessModeEnabled()) {
|
||||
if (immCmdList.isHeaplessModeEnabled() && !aggregatedEventSplit) {
|
||||
auto inOrderAtomicSignalling = genCmdCast<MI_ATOMIC *>(*(++itor));
|
||||
ASSERT_NE(nullptr, inOrderAtomicSignalling);
|
||||
}
|
||||
@@ -4130,10 +4179,10 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
|
||||
EXPECT_FALSE(event->isCounterBased());
|
||||
}
|
||||
for (auto &event : bcsSplit.events.subcopy) {
|
||||
EXPECT_FALSE(event->isCounterBased());
|
||||
EXPECT_EQ(bcsSplit.events.aggregatedEventsMode, event->isCounterBased());
|
||||
}
|
||||
for (auto &event : bcsSplit.events.marker) {
|
||||
EXPECT_FALSE(event->isCounterBased());
|
||||
EXPECT_EQ(bcsSplit.events.aggregatedEventsMode, event->isCounterBased());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4173,6 +4222,7 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo
|
||||
|
||||
*immCmdList->getCsr(false)->getBarrierCountTagAddress() = 0u;
|
||||
immCmdList->getCsr(false)->getNextBarrierCount();
|
||||
*immCmdList->inOrderExecInfo->getBaseHostAddress() = 0;
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, copyParams);
|
||||
|
||||
// implicit dependencies
|
||||
@@ -4195,6 +4245,8 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCo
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
*immCmdList->inOrderExecInfo->getBaseHostAddress() = 0;
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 1, &eventHandle, copyParams);
|
||||
|
||||
verifySplitCmds<FamilyType, FamilyType::gfxCoreFamily>(*cmdStream, offset, device, 1, *immCmdList, events[0]->getCompletionFieldGpuAddress(device));
|
||||
|
||||
Reference in New Issue
Block a user