feature: use completion fence value from direct submission when available

Related-To: NEO-6643
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2022-04-26 13:29:31 +00:00
committed by Compute-Runtime-Automation
parent 702feba063
commit ffd1c430b1
9 changed files with 82 additions and 4 deletions

View File

@@ -849,6 +849,9 @@ const RootDeviceEnvironment &CommandStreamReceiver::peekRootDeviceEnvironment()
}
uint32_t CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
if (completionFenceValuePointer) {
return *completionFenceValuePointer;
}
auto osContextId = osContext->getContextId();
return gfxAllocation.getTaskCount(osContextId);
}

View File

@@ -388,6 +388,7 @@ class CommandStreamReceiver {
IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES];
OsContext *osContext = nullptr;
uint32_t *completionFenceValuePointer = nullptr;
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
std::atomic<uint32_t> taskLevel{0};
@@ -414,6 +415,7 @@ class CommandStreamReceiver {
uint32_t activePartitionsConfig = 1;
uint32_t postSyncWriteOffset = 0;
uint32_t completionFenceOffset = 0;
uint32_t completionFenceValue = 0;
const uint32_t rootDeviceIndex;
const DeviceBitfield deviceBitfield;

View File

@@ -44,6 +44,10 @@ namespace NEO {
template <typename GfxFamily>
CommandStreamReceiverHw<GfxFamily>::~CommandStreamReceiverHw() {
this->unregisterDirectSubmissionFromController();
if (completionFenceValuePointer) {
completionFenceValue = *completionFenceValuePointer;
completionFenceValuePointer = &completionFenceValue;
}
}
template <typename GfxFamily>
@@ -1334,10 +1338,12 @@ inline bool CommandStreamReceiverHw<GfxFamily>::initDirectSubmission() {
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
blitterDirectSubmission = DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>::create(*this);
ret = blitterDirectSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
completionFenceValuePointer = blitterDirectSubmission->getCompletionValuePointer();
} else {
directSubmission = DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>::create(*this);
ret = directSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
completionFenceValuePointer = directSubmission->getCompletionValuePointer();
}
auto directSubmissionController = executionEnvironment.initializeDirectSubmissionController();
if (directSubmissionController) {

View File

@@ -85,6 +85,8 @@ class DirectSubmissionHw {
static std::unique_ptr<DirectSubmissionHw<GfxFamily, Dispatcher>> create(const DirectSubmissionInputParams &inputParams);
virtual uint32_t *getCompletionValuePointer() { return nullptr; }
protected:
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
static constexpr size_t prefetchNoops = prefetchSize / sizeof(uint32_t);

View File

@@ -20,6 +20,13 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
~DrmDirectSubmission();
uint32_t *getCompletionValuePointer() override {
if (this->completionFenceAllocation) {
return &this->completionFenceValue;
}
return DirectSubmissionHw<GfxFamily, Dispatcher>::getCompletionValuePointer();
}
protected:
bool allocateOsResources() override;
bool submit(uint64_t gpuAddress, size_t size) override;

View File

@@ -17,6 +17,7 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily
using CommandStreamReceiver::activePartitions;
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::completionFenceValuePointer;
using CommandStreamReceiver::createPreemptionAllocation;
using CommandStreamReceiver::flushStamp;
using CommandStreamReceiver::getTagAddress;

View File

@@ -140,6 +140,30 @@ HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenCreateDrmDirect
}
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenGettingCompletionFencePointerThenCompletionFenceValueAddressIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_TRUE(drm->completionFenceSupport());
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(&directSubmission.completionFenceValue, directSubmission.getCompletionValuePointer());
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenGettingCompletionFencePointerThenNullptrIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
ASSERT_FALSE(drm->completionFenceSupport());
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.getCompletionValuePointer());
}
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenCreateDrmDirectSubmissionThenCompletionFenceAllocationIsNotSet) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(0);

View File

@@ -19,6 +19,7 @@ template <typename GfxFamily>
struct MockDrmCsr : public DrmCommandStreamReceiver<GfxFamily> {
using DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver;
using DrmCommandStreamReceiver<GfxFamily>::dispatchMode;
using DrmCommandStreamReceiver<GfxFamily>::completionFenceValuePointer;
};
HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenL0ApiConfigWhenCreatingDrmCsrThenEnableImmediateDispatch) {
@@ -34,6 +35,34 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionValueThenTaskCount
EXPECT_EQ(expectedValue, csr->getCompletionValue(allocation));
}
HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGettingCompletionValueThenCompletionFenceValueIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
DebugManager.flags.EnableDirectSubmission.set(1);
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(0);
MockDrmCsr<FamilyType> csr(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive);
csr.setupContext(*osContext);
EXPECT_EQ(nullptr, csr.completionFenceValuePointer);
auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo();
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].engineSupported = true;
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].submitOnInit = true;
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].useNonDefault = true;
csr.createGlobalFenceAllocation();
csr.initializeTagAllocation();
csr.initDirectSubmission();
EXPECT_NE(nullptr, csr.completionFenceValuePointer);
uint32_t expectedValue = 0x5678;
*csr.completionFenceValuePointer = expectedValue;
MockGraphicsAllocation allocation{};
uint32_t notExpectedValue = 0x1234;
allocation.updateTaskCount(notExpectedValue, osContext->getContextId());
EXPECT_EQ(expectedValue, csr.getCompletionValue(allocation));
*csr.completionFenceValuePointer = 0;
}
HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) {
csr->initializeTagAllocation();
EXPECT_NE(nullptr, csr->getTagAddress());