Flush tlb only when new resource is bound

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2021-04-14 14:40:23 +00:00
committed by Compute-Runtime-Automation
parent 46c51cb8a9
commit 83a1a52bdc
13 changed files with 255 additions and 14 deletions

View File

@ -89,6 +89,7 @@ DirectSubmissionSemaphoreAddressing = -1
DirectSubmissionDisableCpuCacheFlush = -1
DirectSubmissionEnableDebugBuffer = 0
DirectSubmissionDiagnosticExecutionCount = 30
DirectSubmissionNewResourceTlbFlush = -1
DirectSubmissionDisableCacheFlush = -1
DirectSubmissionDisableMonitorFence = 0
USMEvictAfterMigration = 1

View File

@ -163,6 +163,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideBlitterSupport, -1, "Ove
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideRenderSupport, -1, "Overrides default render support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Overrides default compute support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(bool, USMEvictAfterMigration, true, "Evict USM allocation after implicit migration to GPU")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionDisableMonitorFence, false, "Disable dispatching monitor fence commands")

View File

@ -37,7 +37,7 @@ struct TagData {
};
namespace UllsDefaults {
constexpr bool defaultDisableCacheFlush = false;
constexpr bool defaultDisableCacheFlush = true;
constexpr bool defaultDisableMonitorFence = false;
} // namespace UllsDefaults
@ -76,6 +76,8 @@ class DirectSubmissionHw {
virtual bool allocateOsResources() = 0;
virtual bool submit(uint64_t gpuAddress, size_t size) = 0;
virtual bool handleResidency() = 0;
virtual void handleNewResourcesSubmission();
virtual size_t getSizeNewResourceHandler();
virtual uint64_t switchRingBuffers();
virtual void handleSwitchRingBuffers() = 0;
GraphicsAllocation *switchRingBuffersAllocations();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -261,6 +261,8 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
size += Dispatcher::getSizeMonitorFence(*hwInfo);
}
size += getSizeNewResourceHandler();
return size;
}
@ -329,6 +331,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
buffersSwitched = true;
}
handleNewResourcesSubmission();
void *currentPosition = dispatchWorkloadSection(batchBuffer);
if (ringStart) {
@ -376,6 +380,15 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDisablePrefetche
return 0u;
}
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::handleNewResourcesSubmission() {
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeNewResourceHandler() {
return 0u;
}
template <typename GfxFamily, typename Dispatcher>
inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers() {
GraphicsAllocation *nextRingBuffer = switchRingBuffersAllocations();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,6 +23,8 @@ class BlitterDispatcher : public Dispatcher<GfxFamily> {
static size_t getSizeMonitorFence(const HardwareInfo &hwInfo);
static void dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo);
static void dispatchTlbFlush(LinearStream &cmdBuffer);
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
static size_t getSizeTlbFlush();
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -41,10 +41,19 @@ inline void BlitterDispatcher<GfxFamily>::dispatchCacheFlush(LinearStream &cmdBu
EncodeMiFlushDW<GfxFamily>::programMiFlushDw(cmdBuffer, 0ull, 0ull, false, false);
}
template <typename GfxFamily>
inline void BlitterDispatcher<GfxFamily>::dispatchTlbFlush(LinearStream &cmdBuffer) {
}
template <typename GfxFamily>
inline size_t BlitterDispatcher<GfxFamily>::getSizeCacheFlush(const HardwareInfo &hwInfo) {
size_t size = EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
return size;
}
template <typename GfxFamily>
inline size_t BlitterDispatcher<GfxFamily>::getSizeTlbFlush() {
return 0u;
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,6 +23,8 @@ class RenderDispatcher : public Dispatcher<GfxFamily> {
static size_t getSizeMonitorFence(const HardwareInfo &hwInfo);
static void dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo);
static void dispatchTlbFlush(LinearStream &cmdBuffer);
static size_t getSizeCacheFlush(const HardwareInfo &hwInfo);
static size_t getSizeTlbFlush();
};
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -52,10 +52,23 @@ inline void RenderDispatcher<GfxFamily>::dispatchCacheFlush(LinearStream &cmdBuf
MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(cmdBuffer);
}
template <typename GfxFamily>
inline void RenderDispatcher<GfxFamily>::dispatchTlbFlush(LinearStream &cmdBuffer) {
PipeControlArgs args(false);
args.tlbInvalidation = true;
args.pipeControlFlushEnable = true;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdBuffer, args);
}
template <typename GfxFamily>
inline size_t RenderDispatcher<GfxFamily>::getSizeCacheFlush(const HardwareInfo &hwInfo) {
size_t size = MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush();
return size;
}
template <typename GfxFamily>
inline size_t RenderDispatcher<GfxFamily>::getSizeTlbFlush() {
return MemorySynchronizationCommands<GfxFamily>::getSizeForSinglePipeControl();
}
} // namespace NEO

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -26,6 +26,9 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
bool submit(uint64_t gpuAddress, size_t size) override;
bool handleResidency() override;
bool isNewResourceHandleNeeded();
void handleNewResourcesSubmission() override;
size_t getSizeNewResourceHandler() override;
void handleSwitchRingBuffers() override;
uint64_t updateTagValue() override;
void getTagAddressValue(TagData &tagData) override;

View File

@ -89,6 +89,41 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::handleResidency() {
return true;
}
template <typename GfxFamily, typename Dispatcher>
bool DrmDirectSubmission<GfxFamily, Dispatcher>::isNewResourceHandleNeeded() {
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
auto newResourcesBound = osContextLinux->getDrm().getNewResourceBound();
if (DebugManager.flags.DirectSubmissionNewResourceTlbFlush.get() != -1) {
newResourcesBound = DebugManager.flags.DirectSubmissionNewResourceTlbFlush.get();
}
return newResourcesBound;
}
template <typename GfxFamily, typename Dispatcher>
void DrmDirectSubmission<GfxFamily, Dispatcher>::handleNewResourcesSubmission() {
if (isNewResourceHandleNeeded()) {
Dispatcher::dispatchTlbFlush(this->ringCommandStream);
}
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
if (!EngineHelpers::isBcs(osContextLinux->getEngineType())) {
osContextLinux->getDrm().setNewResourceBound(false);
}
}
template <typename GfxFamily, typename Dispatcher>
size_t DrmDirectSubmission<GfxFamily, Dispatcher>::getSizeNewResourceHandler() {
size_t size = 0u;
if (isNewResourceHandleNeeded()) {
size += Dispatcher::getSizeTlbFlush();
}
return size;
}
template <typename GfxFamily, typename Dispatcher>
void DrmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers() {
if (this->ringStart) {

View File

@ -174,6 +174,9 @@ class Drm {
uint64_t getNextFenceVal(uint32_t vmHandleId) { return ++fenceVal[vmHandleId]; }
uint64_t *getFenceAddr(uint32_t vmHandleId) { return &pagingFence[vmHandleId]; }
void setNewResourceBound(bool value) { this->newResourceBound = value; };
bool getNewResourceBound() { return this->newResourceBound; };
protected:
int getQueueSliceCount(drm_i915_gem_context_param_sseu *sseu);
bool translateTopologyInfo(const drm_i915_query_topology_info *queryTopologyInfo, int &sliceCount, int &subSliceCount, int &euCount);
@ -187,6 +190,7 @@ class Drm {
bool bindAvailable = false;
bool directSubmissionActive = false;
bool contextDebugSupported = false;
bool newResourceBound = false;
std::once_flag checkBindOnce;
std::unique_ptr<HwDeviceId> hwDeviceId;
int deviceId = 0;

View File

@ -903,7 +903,7 @@ HWTEST_F(DirectSubmissionTest,
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice,
*osContext.get());
EXPECT_FALSE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_FALSE(directSubmission.disableMonitorFence);
@ -938,7 +938,7 @@ HWTEST_F(DirectSubmissionTest,
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice,
*osContext.get());
EXPECT_FALSE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_FALSE(directSubmission.disableMonitorFence);
@ -973,7 +973,7 @@ HWTEST_F(DirectSubmissionTest,
NEO::IoFunctions::mockFcloseCalled = 0u;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice,
*osContext.get());
EXPECT_FALSE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_FALSE(directSubmission.disableMonitorFence);
@ -1014,7 +1014,7 @@ HWTEST_F(DirectSubmissionTest,
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice,
*osContext.get());
uint32_t expectedSemaphoreValue = directSubmission.currentQueueWorkCount;
EXPECT_FALSE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_TRUE(directSubmission.disableMonitorFence);
@ -1039,7 +1039,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_EQ(expectedSize, directSubmission.ringCommandStream.getUsed());
EXPECT_EQ(expectedSemaphoreValue, directSubmission.currentQueueWorkCount);
EXPECT_FALSE(directSubmission.disableCacheFlush);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_FALSE(directSubmission.disableMonitorFence);
EXPECT_EQ(0u, directSubmission.workloadMode);
EXPECT_EQ(nullptr, directSubmission.diagnostic.get());
@ -1101,7 +1101,7 @@ HWTEST_F(DirectSubmissionTest,
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice,
*osContext.get());
uint32_t expectedSemaphoreValue = directSubmission.currentQueueWorkCount;
EXPECT_FALSE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush);
EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_TRUE(directSubmission.disableMonitorFence);
@ -1127,7 +1127,7 @@ HWTEST_F(DirectSubmissionTest,
EXPECT_EQ(expectedSize, directSubmission.ringCommandStream.getUsed());
EXPECT_EQ(expectedSemaphoreValue, directSubmission.currentQueueWorkCount);
EXPECT_FALSE(directSubmission.disableCacheFlush);
EXPECT_TRUE(directSubmission.disableCacheFlush);
EXPECT_FALSE(directSubmission.disableMonitorFence);
EXPECT_EQ(0u, directSubmission.workloadMode);
EXPECT_EQ(nullptr, directSubmission.diagnostic.get());

View File

@ -8,6 +8,8 @@
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/ult_hw_config.h"
#include "shared/test/common/mocks/mock_device.h"
@ -46,8 +48,11 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::allocateResources;
using BaseClass::currentTagData;
using BaseClass::DrmDirectSubmission;
using BaseClass::getSizeNewResourceHandler;
using BaseClass::getTagAddressValue;
using BaseClass::handleNewResourcesSubmission;
using BaseClass::handleResidency;
using BaseClass::isNewResourceHandleNeeded;
using BaseClass::submit;
using BaseClass::switchRingBuffers;
using BaseClass::tagAddress;
@ -101,3 +106,154 @@ HWTEST_F(DrmDirectSubmissionTest, whenCheckForDirectSubmissionSupportThenProperV
auto &hwHelper = HwHelper::get(device->getHardwareInfo().platform.eRenderCoreFamily);
EXPECT_EQ(directSubmissionSupported, hwHelper.isDirectSubmissionSupported() && executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm()->isVmBindAvailable());
}
HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionNewResourceTlbFlushWhenDispatchCommandBufferThenTlbIsFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionNewResourceTlbFlush.set(1);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), sizeof(PIPE_CONTROL));
directSubmission.handleNewResourcesSubmission();
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
auto *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_TRUE(pipeControl->getTlbInvalidate());
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), sizeof(PIPE_CONTROL));
}
HWTEST_F(DrmDirectSubmissionTest, givenNewResourceBoundhWhenDispatchCommandBufferThenTlbIsFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionNewResourceTlbFlush.set(-1);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm());
drm->setNewResourceBound(true);
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), sizeof(PIPE_CONTROL));
directSubmission.handleNewResourcesSubmission();
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
auto *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_TRUE(pipeControl->getTlbInvalidate());
EXPECT_FALSE(drm->getNewResourceBound());
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
}
HWTEST_F(DrmDirectSubmissionTest, givennoNewResourceBoundhWhenDispatchCommandBufferThenTlbIsNotFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionNewResourceTlbFlush.set(-1);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm());
drm->setNewResourceBound(false);
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
directSubmission.handleNewResourcesSubmission();
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
auto *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_EQ(pipeControl, nullptr);
EXPECT_FALSE(drm->getNewResourceBound());
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
}
HWTEST_F(DrmDirectSubmissionTest, givenDirectSubmissionNewResourceTlbFlusZeroAndNewResourceBoundhWhenDispatchCommandBufferThenTlbIsNotFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
DebugManager.flags.DirectSubmissionNewResourceTlbFlush.set(0);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm());
drm->setNewResourceBound(true);
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
directSubmission.handleNewResourcesSubmission();
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
auto *pipeControl = hwParse.getCommand<PIPE_CONTROL>();
EXPECT_EQ(pipeControl, nullptr);
EXPECT_FALSE(drm->getNewResourceBound());
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
}
HWTEST_F(DrmDirectSubmissionTest, givenBlitterDispatcherWhenHandleNewResourceThenDoNotFlushTlb) {
using MI_FLUSH = typename FamilyType::MI_FLUSH_DW;
using Dispatcher = BlitterDispatcher<FamilyType>;
auto osContext = std::make_unique<OsContextLinux>(*executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm(),
0u, device->getDeviceBitfield(), EngineTypeUsage{aub_stream::ENGINE_BCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup,
false);
MockDrmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device.get(),
*osContext.get());
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
auto drm = static_cast<DrmMock *>(executionEnvironment.rootDeviceEnvironments[0]->osInterface->get()->getDrm());
drm->setNewResourceBound(true);
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
directSubmission.handleNewResourcesSubmission();
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
auto *miFlush = hwParse.getCommand<MI_FLUSH>();
EXPECT_EQ(miFlush, nullptr);
EXPECT_TRUE(drm->getNewResourceBound());
EXPECT_EQ(directSubmission.getSizeNewResourceHandler(), 0u);
}