performance: enable staging write for cl buffers

Related-To: NEO-13529

Also, add size threshold on iGPU on Linux,
and disable staging if imported host ptr could
be reused

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-02-04 10:24:31 +00:00
committed by Compute-Runtime-Automation
parent 35d8e82664
commit b11322332c
14 changed files with 174 additions and 22 deletions

View File

@@ -94,4 +94,15 @@ TEST(OsInterfaceTest, whenOsInterfaceSetupGmmInputArgsThenArgsAreSet) {
EXPECT_EQ(GMM_CLIENT::GMM_OCL_VISTA, passedInputArgs.ClientType);
}
TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenGetThresholdForStagingCalledThenReturnThresholdForIntegratedDevices) {
OSInterface osInterface;
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock *drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]);
osInterface.setDriverModel(std::unique_ptr<DriverModel>(drm));
EXPECT_TRUE(osInterface.isSizeWithinThresholdForStaging(MemoryConstants::gigaByte, false));
EXPECT_FALSE(osInterface.isSizeWithinThresholdForStaging(MemoryConstants::gigaByte, true));
}
} // namespace NEO

View File

@@ -153,3 +153,14 @@ TEST_F(OsInterfaceTest, givenEnableFtrTile64OptimizationDebugKeyWhenSetThenPrope
EXPECT_EQ(1u, passedFtrTable.FtrTile64Optimization);
}
}
TEST_F(OsInterfaceTest, whenGetThresholdForStagingCalledThenReturnNoThreshold) {
MockExecutionEnvironment executionEnvironment;
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
auto wddm = new WddmMock(rootDeviceEnvironment);
EXPECT_EQ(nullptr, rootDeviceEnvironment.osInterface.get());
wddm->init();
EXPECT_NE(nullptr, rootDeviceEnvironment.osInterface.get());
EXPECT_TRUE(rootDeviceEnvironment.osInterface->isSizeWithinThresholdForStaging(MemoryConstants::gigaByte, false));
EXPECT_TRUE(rootDeviceEnvironment.osInterface->isSizeWithinThresholdForStaging(MemoryConstants::gigaByte, true));
}

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/staging_buffer_manager.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -20,6 +21,13 @@
using namespace NEO;
struct MockOSIface : OSInterface {
bool isSizeWithinThresholdForStaging(size_t size, bool isIGPU) const override {
return isSizeWithinThresholdValue;
}
bool isSizeWithinThresholdValue = true;
};
class StagingBufferManagerFixture : public DeviceFixture {
public:
void setUp() {
@@ -31,6 +39,8 @@ class StagingBufferManagerFixture : public DeviceFixture {
std::map<uint32_t, DeviceBitfield> deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}};
this->stagingBufferManager = std::make_unique<StagingBufferManager>(svmAllocsManager.get(), rootDeviceIndices, deviceBitfields, false);
this->csr = pDevice->commandStreamReceivers[0].get();
this->osInterface = new MockOSIface{};
this->pDevice->getRootDeviceEnvironmentRef().osInterface.reset(this->osInterface);
}
void tearDown() {
@@ -165,6 +175,7 @@ class StagingBufferManagerFixture : public DeviceFixture {
std::unique_ptr<MockSVMAllocsManager> svmAllocsManager;
std::unique_ptr<StagingBufferManager> stagingBufferManager;
CommandStreamReceiver *csr;
MockOSIface *osInterface;
};
using StagingBufferManagerTest = Test<StagingBufferManagerFixture>;
@@ -199,18 +210,31 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferEnabledWhenValidForCopyThenRe
}
auto actualValid = stagingBufferManager->isValidForCopy(*pDevice, copyParamsStruct[i].dstPtr, copyParamsStruct[i].srcPtr, copyParamsStruct[i].size, copyParamsStruct[i].hasDependencies, 0u);
EXPECT_EQ(actualValid, copyParamsStruct[i].expectValid);
stagingBufferManager->resetDetectedPtrs();
}
debugManager.flags.EnableCopyWithStagingBuffers.set(0);
EXPECT_FALSE(stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
stagingBufferManager->resetDetectedPtrs();
debugManager.flags.EnableCopyWithStagingBuffers.set(-1);
auto isStaingBuffersEnabled = pDevice->getProductHelper().isStagingBuffersEnabled();
EXPECT_EQ(isStaingBuffersEnabled, stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
auto isStagingBuffersEnabled = pDevice->getProductHelper().isStagingBuffersEnabled();
EXPECT_EQ(isStagingBuffersEnabled, stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
stagingBufferManager->registerHostPtr(nonUsmBuffer);
EXPECT_FALSE(stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
stagingBufferManager->resetDetectedPtrs();
this->osInterface->isSizeWithinThresholdValue = false;
EXPECT_FALSE(stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
stagingBufferManager->resetDetectedPtrs();
this->pDevice->getRootDeviceEnvironmentRef().osInterface.reset(nullptr);
EXPECT_EQ(isStagingBuffersEnabled, stagingBufferManager->isValidForCopy(*pDevice, usmBuffer, nonUsmBuffer, bufferSize, false, 0u));
svmAllocsManager->freeSVMAlloc(usmBuffer);
}
TEST_F(StagingBufferManagerTest, givenStagingBufferEnabledWhenValidForImageWriteThenReturnCorrectValue) {
TEST_F(StagingBufferManagerTest, givenStagingBufferEnabledWhenValidForStagingTransferThenReturnCorrectValue) {
constexpr size_t bufferSize = 1024;
auto usmBuffer = allocateDeviceBuffer(bufferSize);
unsigned char nonUsmBuffer[bufferSize];
@@ -226,16 +250,23 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferEnabledWhenValidForImageWrite
{nonUsmBuffer, true, false},
};
for (auto i = 0; i < 4; i++) {
auto actualValid = stagingBufferManager->isValidForStagingTransfer(*pDevice, copyParamsStruct[i].ptr, copyParamsStruct[i].hasDependencies);
auto actualValid = stagingBufferManager->isValidForStagingTransfer(*pDevice, copyParamsStruct[i].ptr, bufferSize, copyParamsStruct[i].hasDependencies);
EXPECT_EQ(actualValid, copyParamsStruct[i].expectValid);
}
debugManager.flags.EnableCopyWithStagingBuffers.set(0);
EXPECT_FALSE(stagingBufferManager->isValidForStagingTransfer(*pDevice, nonUsmBuffer, false));
EXPECT_FALSE(stagingBufferManager->isValidForStagingTransfer(*pDevice, nonUsmBuffer, bufferSize, false));
debugManager.flags.EnableCopyWithStagingBuffers.set(-1);
auto isStaingBuffersEnabled = pDevice->getProductHelper().isStagingBuffersEnabled();
EXPECT_EQ(isStaingBuffersEnabled, stagingBufferManager->isValidForStagingTransfer(*pDevice, nonUsmBuffer, false));
stagingBufferManager->resetDetectedPtrs();
EXPECT_EQ(isStaingBuffersEnabled, stagingBufferManager->isValidForStagingTransfer(*pDevice, nonUsmBuffer, bufferSize, false));
EXPECT_FALSE(stagingBufferManager->isValidForStagingTransfer(*pDevice, usmBuffer, bufferSize, false));
stagingBufferManager->registerHostPtr(nonUsmBuffer);
EXPECT_FALSE(stagingBufferManager->isValidForStagingTransfer(*pDevice, nonUsmBuffer, bufferSize, false));
svmAllocsManager->freeSVMAlloc(usmBuffer);
}