mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-06 10:26:29 +08:00
feature: use completion fence value from direct submission when available
Related-To: NEO-6643 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
702feba063
commit
ffd1c430b1
@@ -849,6 +849,9 @@ const RootDeviceEnvironment &CommandStreamReceiver::peekRootDeviceEnvironment()
|
||||
}
|
||||
|
||||
uint32_t CommandStreamReceiver::getCompletionValue(const GraphicsAllocation &gfxAllocation) {
|
||||
if (completionFenceValuePointer) {
|
||||
return *completionFenceValuePointer;
|
||||
}
|
||||
auto osContextId = osContext->getContextId();
|
||||
return gfxAllocation.getTaskCount(osContextId);
|
||||
}
|
||||
|
||||
@@ -388,6 +388,7 @@ class CommandStreamReceiver {
|
||||
|
||||
IndirectHeap *indirectHeap[IndirectHeap::Type::NUM_TYPES];
|
||||
OsContext *osContext = nullptr;
|
||||
uint32_t *completionFenceValuePointer = nullptr;
|
||||
|
||||
// current taskLevel. Used for determining if a PIPE_CONTROL is needed.
|
||||
std::atomic<uint32_t> taskLevel{0};
|
||||
@@ -414,6 +415,7 @@ class CommandStreamReceiver {
|
||||
uint32_t activePartitionsConfig = 1;
|
||||
uint32_t postSyncWriteOffset = 0;
|
||||
uint32_t completionFenceOffset = 0;
|
||||
uint32_t completionFenceValue = 0;
|
||||
|
||||
const uint32_t rootDeviceIndex;
|
||||
const DeviceBitfield deviceBitfield;
|
||||
|
||||
@@ -44,6 +44,10 @@ namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiverHw<GfxFamily>::~CommandStreamReceiverHw() {
|
||||
this->unregisterDirectSubmissionFromController();
|
||||
if (completionFenceValuePointer) {
|
||||
completionFenceValue = *completionFenceValuePointer;
|
||||
completionFenceValuePointer = &completionFenceValue;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -1334,10 +1338,12 @@ inline bool CommandStreamReceiverHw<GfxFamily>::initDirectSubmission() {
|
||||
if (EngineHelpers::isBcs(this->osContext->getEngineType())) {
|
||||
blitterDirectSubmission = DirectSubmissionHw<GfxFamily, BlitterDispatcher<GfxFamily>>::create(*this);
|
||||
ret = blitterDirectSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
|
||||
completionFenceValuePointer = blitterDirectSubmission->getCompletionValuePointer();
|
||||
|
||||
} else {
|
||||
directSubmission = DirectSubmissionHw<GfxFamily, RenderDispatcher<GfxFamily>>::create(*this);
|
||||
ret = directSubmission->initialize(submitOnInit, this->isUsedNotifyEnableForPostSync());
|
||||
completionFenceValuePointer = directSubmission->getCompletionValuePointer();
|
||||
}
|
||||
auto directSubmissionController = executionEnvironment.initializeDirectSubmissionController();
|
||||
if (directSubmissionController) {
|
||||
|
||||
@@ -85,6 +85,8 @@ class DirectSubmissionHw {
|
||||
|
||||
static std::unique_ptr<DirectSubmissionHw<GfxFamily, Dispatcher>> create(const DirectSubmissionInputParams &inputParams);
|
||||
|
||||
virtual uint32_t *getCompletionValuePointer() { return nullptr; }
|
||||
|
||||
protected:
|
||||
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
|
||||
static constexpr size_t prefetchNoops = prefetchSize / sizeof(uint32_t);
|
||||
|
||||
@@ -20,6 +20,13 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
||||
|
||||
~DrmDirectSubmission();
|
||||
|
||||
uint32_t *getCompletionValuePointer() override {
|
||||
if (this->completionFenceAllocation) {
|
||||
return &this->completionFenceValue;
|
||||
}
|
||||
return DirectSubmissionHw<GfxFamily, Dispatcher>::getCompletionValuePointer();
|
||||
}
|
||||
|
||||
protected:
|
||||
bool allocateOsResources() override;
|
||||
bool submit(uint64_t gpuAddress, size_t size) override;
|
||||
|
||||
@@ -17,6 +17,7 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver<GfxFamily
|
||||
using CommandStreamReceiver::activePartitions;
|
||||
using CommandStreamReceiver::clearColorAllocation;
|
||||
using CommandStreamReceiver::commandStream;
|
||||
using CommandStreamReceiver::completionFenceValuePointer;
|
||||
using CommandStreamReceiver::createPreemptionAllocation;
|
||||
using CommandStreamReceiver::flushStamp;
|
||||
using CommandStreamReceiver::getTagAddress;
|
||||
|
||||
@@ -140,6 +140,30 @@ HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenCreateDrmDirect
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportWhenGettingCompletionFencePointerThenCompletionFenceValueAddressIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableDrmCompletionFence.set(1);
|
||||
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||
|
||||
ASSERT_TRUE(drm->completionFenceSupport());
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||
EXPECT_EQ(&directSubmission.completionFenceValue, directSubmission.getCompletionValuePointer());
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenGettingCompletionFencePointerThenNullptrIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableDrmCompletionFence.set(0);
|
||||
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
|
||||
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
|
||||
|
||||
ASSERT_FALSE(drm->completionFenceSupport());
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||
EXPECT_EQ(nullptr, directSubmission.getCompletionValuePointer());
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportWhenCreateDrmDirectSubmissionThenCompletionFenceAllocationIsNotSet) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableDrmCompletionFence.set(0);
|
||||
|
||||
@@ -19,6 +19,7 @@ template <typename GfxFamily>
|
||||
struct MockDrmCsr : public DrmCommandStreamReceiver<GfxFamily> {
|
||||
using DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver;
|
||||
using DrmCommandStreamReceiver<GfxFamily>::dispatchMode;
|
||||
using DrmCommandStreamReceiver<GfxFamily>::completionFenceValuePointer;
|
||||
};
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenL0ApiConfigWhenCreatingDrmCsrThenEnableImmediateDispatch) {
|
||||
@@ -34,6 +35,34 @@ HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionValueThenTaskCount
|
||||
EXPECT_EQ(expectedValue, csr->getCompletionValue(allocation));
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamTest, givenEnabledDirectSubmissionWhenGettingCompletionValueThenCompletionFenceValueIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableDrmCompletionFence.set(1);
|
||||
DebugManager.flags.EnableDirectSubmission.set(1);
|
||||
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(0);
|
||||
MockDrmCsr<FamilyType> csr(executionEnvironment, 0, 1, gemCloseWorkerMode::gemCloseWorkerInactive);
|
||||
csr.setupContext(*osContext);
|
||||
EXPECT_EQ(nullptr, csr.completionFenceValuePointer);
|
||||
|
||||
auto hwInfo = executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo();
|
||||
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].engineSupported = true;
|
||||
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].submitOnInit = true;
|
||||
hwInfo->capabilityTable.directSubmissionEngines.data[osContext->getEngineType()].useNonDefault = true;
|
||||
csr.createGlobalFenceAllocation();
|
||||
csr.initializeTagAllocation();
|
||||
csr.initDirectSubmission();
|
||||
|
||||
EXPECT_NE(nullptr, csr.completionFenceValuePointer);
|
||||
|
||||
uint32_t expectedValue = 0x5678;
|
||||
*csr.completionFenceValuePointer = expectedValue;
|
||||
MockGraphicsAllocation allocation{};
|
||||
uint32_t notExpectedValue = 0x1234;
|
||||
allocation.updateTaskCount(notExpectedValue, osContext->getContextId());
|
||||
EXPECT_EQ(expectedValue, csr.getCompletionValue(allocation));
|
||||
*csr.completionFenceValuePointer = 0;
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(DrmCommandStreamTest, whenGettingCompletionAddressThenOffsettedTagAddressIsReturned) {
|
||||
csr->initializeTagAllocation();
|
||||
EXPECT_NE(nullptr, csr->getTagAddress());
|
||||
|
||||
Reference in New Issue
Block a user