Synchronize switching command buffers for all partitions

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-06 22:29:47 +00:00
committed by Compute-Runtime-Automation
parent 6b062a62b8
commit cd4f3c221a
35 changed files with 271 additions and 112 deletions

View File

@@ -198,8 +198,12 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait,
useQuickKmdSleep, forcePowerSavingMode);
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
flushStampToWait,
useQuickKmdSleep,
forcePowerSavingMode,
1u,
0u);
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
if (gtpinIsGTPinInitialized()) {
@@ -207,7 +211,7 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
}
if (auto bcsCsr = getBcsCommandStreamReceiver()) {
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false, 1u, 0u);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
}

View File

@@ -63,7 +63,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
void pollForCompletion() override;
void pollForCompletionImpl() override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
uint32_t getDumpHandle();
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);

View File

@@ -599,8 +599,8 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
}
template <typename GfxFamily>
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
pollForCompletion();
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -40,7 +40,8 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) override;
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) override;
size_t getPreferredTagPoolSize() const override { return 1; }

View File

@@ -71,12 +71,13 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
template <typename BaseCSR>
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
bool useQuickKmdSleep, bool forcePowerSavingMode) {
bool useQuickKmdSleep, bool forcePowerSavingMode,
uint32_t partitionCount, uint32_t offsetSize) {
if (aubCSR) {
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
template <typename BaseCSR>

View File

@@ -47,7 +47,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
MOCKABLE_VIRTUAL void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
void makeNonResident(GraphicsAllocation &gfxAllocation) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) override;
bool isKmdWaitModeActive() override;
DrmMemoryManager *getMemoryManager() const;
@@ -66,7 +66,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
protected:
MOCKABLE_VIRTUAL void flushInternal(const BatchBuffer &batchBuffer, const ResidencyContainer &allocationsForResidency);
MOCKABLE_VIRTUAL void exec(const BatchBuffer &batchBuffer, uint32_t vmHandleId, uint32_t drmContextId);
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue);
MOCKABLE_VIRTUAL int waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize);
bool isUserFenceWaitActive();
std::vector<BufferObject *> residency;

View File

@@ -218,10 +218,10 @@ GmmPageTableMngr *DrmCommandStreamReceiver<GfxFamily>::createPageTableManager()
}
template <typename GfxFamily>
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp) {
bool DrmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStamp, uint32_t partitionCount, uint32_t offsetSize) {
auto waitValue = static_cast<uint32_t>(flushStamp);
if (isUserFenceWaitActive()) {
waitUserFence(waitValue);
waitUserFence(waitValue, partitionCount, offsetSize);
} else {
this->drm->waitHandle(waitValue, kmdWaitTimeout);
}

View File

@@ -18,7 +18,7 @@ void DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchBuffer &batch
}
template <typename GfxFamily>
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize) {
uint32_t ctxId = 0u;
uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (useContextForUserFenceWait) {

View File

@@ -51,19 +51,28 @@ void DrmCommandStreamReceiver<GfxFamily>::flushInternal(const BatchBuffer &batch
}
template <typename GfxFamily>
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue) {
int DrmCommandStreamReceiver<GfxFamily>::waitUserFence(uint32_t waitValue, uint32_t partitionCount, uint32_t offsetSize) {
int ret = 0;
StackVec<uint32_t, 32> ctxIds;
uint64_t tagAddress = castToUint64(const_cast<uint32_t *>(getTagAddress()));
if (useContextForUserFenceWait) {
for (auto tileIterator = 0u; tileIterator < this->osContext->getDeviceBitfield().size(); tileIterator++) {
uint32_t ctxId = 0u;
if (this->osContext->getDeviceBitfield().test(tileIterator)) {
ctxId = static_cast<const OsContextLinux *>(osContext)->getDrmContextIds()[tileIterator];
ret |= this->drm->waitUserFence(ctxId, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
ctxIds.push_back(ctxId);
}
}
UNRECOVERABLE_IF(ctxIds.size() != partitionCount);
for (uint32_t i = 0; i < partitionCount; i++) {
ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
tagAddress += offsetSize;
}
} else {
ret = this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
for (uint32_t i = 0; i < partitionCount; i++) {
ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U32, kmdWaitTimeout, 0u);
tagAddress += offsetSize;
}
}
return ret;

View File

@@ -27,7 +27,7 @@ class WddmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily>
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
void processEviction() override;
bool waitForFlushStamp(FlushStamp &flushStampToWait) override;
bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) override;
WddmMemoryManager *getMemoryManager() const;
Wddm *peekWddm() const {

View File

@@ -132,7 +132,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
}
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) {
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence());
}