performance: enable cmd buffer preallocate xehp+

For L0 immediate cmdlists try to use reusable allocations if available.

Related-To: NEO-10526

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-03-06 12:40:48 +00:00
committed by Compute-Runtime-Automation
parent f0281202bf
commit 033ff28609
8 changed files with 34 additions and 29 deletions

View File

@@ -480,10 +480,6 @@ void CommandContainer::setCmdBuffer(GraphicsAllocation *cmdBuffer) {
}
}
GraphicsAllocation *CommandContainer::allocateCommandBuffer() {
return this->allocateCommandBuffer(false);
}
GraphicsAllocation *CommandContainer::allocateCommandBuffer(bool forceHostMemory) {
size_t alignedSize = getAlignedCmdBufferSize();
AllocationProperties properties{device->getRootDeviceIndex(),
@@ -517,12 +513,12 @@ void CommandContainer::fillReusableAllocationLists() {
}
for (auto i = 0u; i < amountToFill; i++) {
auto allocToReuse = this->allocateCommandBuffer();
auto allocToReuse = obtainNextCommandBufferAllocation();
this->immediateReusableAllocationList->pushTailOne(*allocToReuse);
this->getResidencyContainer().push_back(allocToReuse);
if (this->useSecondaryCommandStream) {
auto hostAllocToReuse = this->allocateCommandBuffer(true);
auto hostAllocToReuse = obtainNextCommandBufferAllocation(true);
this->immediateReusableAllocationList->pushTailOne(*hostAllocToReuse);
this->getResidencyContainer().push_back(hostAllocToReuse);
}

View File

@@ -149,7 +149,6 @@ class CommandContainer : public NonCopyableOrMovableClass {
GraphicsAllocation *reuseExistingCmdBuffer();
GraphicsAllocation *reuseExistingCmdBuffer(bool forceHostMemory);
GraphicsAllocation *allocateCommandBuffer();
MOCKABLE_VIRTUAL GraphicsAllocation *allocateCommandBuffer(bool forceHostMemory);
void setCmdBuffer(GraphicsAllocation *cmdBuffer);
void addCurrentCommandBufferToReusableAllocationList();

View File

@@ -681,14 +681,6 @@ bool GfxCoreHelperHw<GfxFamily>::copyThroughLockedPtrEnabled(const HardwareInfo
return false;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getAmountOfAllocationsToFill() const {
if (debugManager.flags.SetAmountOfReusableAllocations.get() != -1) {
return debugManager.flags.SetAmountOfReusableAllocations.get();
}
return 0u;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::isChipsetUniqueUUIDSupported() const {
return false;

View File

@@ -53,6 +53,14 @@ bool GfxCoreHelperHw<GfxFamily>::isUpdateTaskCountFromWaitSupported() const {
return false;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getAmountOfAllocationsToFill() const {
if (debugManager.flags.SetAmountOfReusableAllocations.get() != -1) {
return debugManager.flags.SetAmountOfReusableAllocations.get();
}
return 0u;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::makeResidentBeforeLockNeeded(bool precondition) const {
return precondition;

View File

@@ -58,6 +58,14 @@ bool GfxCoreHelperHw<GfxFamily>::isTimestampWaitSupportedForQueues() const {
return true;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getAmountOfAllocationsToFill() const {
if (debugManager.flags.SetAmountOfReusableAllocations.get() != -1) {
return debugManager.flags.SetAmountOfReusableAllocations.get();
}
return 1u;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::makeResidentBeforeLockNeeded(bool precondition) const {
return true;

View File

@@ -399,14 +399,6 @@ bool GfxCoreHelperHw<Family>::copyThroughLockedPtrEnabled(const HardwareInfo &hw
return true;
}
template <>
uint32_t GfxCoreHelperHw<Family>::getAmountOfAllocationsToFill() const {
if (debugManager.flags.SetAmountOfReusableAllocations.get() != -1) {
return debugManager.flags.SetAmountOfReusableAllocations.get();
}
return 1u;
}
template <>
bool GfxCoreHelperHw<Family>::isRelaxedOrderingSupported() const {
return true;

View File

@@ -1347,11 +1347,6 @@ HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenCallCopyThroughLockedPtrEnabl
EXPECT_FALSE(gfxCoreHelper.copyThroughLockedPtrEnabled(*defaultHwInfo, productHelper));
}
HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenCallGetAmountOfAllocationsToFillThenReturnFalse, IsNotXeHpcCore) {
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 0u);
}
HWTEST_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallCopyThroughLockedPtrEnabledThenReturnCorrectValue) {
DebugManagerStateRestore restorer;
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
@@ -1363,9 +1358,11 @@ HWTEST_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallCopyThroughLocke
EXPECT_TRUE(gfxCoreHelper.copyThroughLockedPtrEnabled(*defaultHwInfo, productHelper));
}
HWTEST_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllocationsToFillThenReturnCorrectValue) {
HWTEST2_F(GfxCoreHelperTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllocationsToFillThenReturnCorrectValue, IsBeforeXeHpCore) {
DebugManagerStateRestore restorer;
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 0u);
debugManager.flags.SetAmountOfReusableAllocations.set(0);
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 0u);

View File

@@ -232,6 +232,19 @@ HWTEST2_F(GfxCoreHelperDg2AndLaterTest, givenGfxCoreHelperWhenCheckMakeResidentB
EXPECT_TRUE(gfxCoreHelper.makeResidentBeforeLockNeeded(false));
}
HWTEST2_F(GfxCoreHelperDg2AndLaterTest, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllocationsToFillThenReturnCorrectValue, IsAtLeastXeHpgCore) {
DebugManagerStateRestore restorer;
MockExecutionEnvironment mockExecutionEnvironment{};
auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
debugManager.flags.SetAmountOfReusableAllocations.set(0);
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 0u);
debugManager.flags.SetAmountOfReusableAllocations.set(1);
EXPECT_EQ(gfxCoreHelper.getAmountOfAllocationsToFill(), 1u);
}
using ProductHelperTestDg2AndLater = ::testing::Test;
HWTEST2_F(ProductHelperTestDg2AndLater, givenDg2AndLaterPlatformWhenAskedIfHeapInLocalMemThenTrueIsReturned, IsAtLeastXeHpgCore) {