Synchronize switching command buffers for all partitions

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-09-06 22:29:47 +00:00
committed by Compute-Runtime-Automation
parent 6b062a62b8
commit cd4f3c221a
35 changed files with 271 additions and 112 deletions

View File

@@ -257,10 +257,10 @@ void CommandStreamReceiver::cleanupResources() {
}
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
return waitForCompletionWithTimeout(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait);
return waitForCompletionWithTimeout(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait, 1u, 0u);
}
bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize) {
std::chrono::high_resolution_clock::time_point time1, time2;
int64_t timeDiff = 0;
@@ -275,22 +275,33 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *poll
}
}
volatile uint32_t *partitionAddress = pollAddress;
time1 = std::chrono::high_resolution_clock::now();
while (*pollAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
if (WaitUtils::waitFunction(pollAddress, taskCountToWait)) {
break;
for (uint32_t i = 0; i < partitionCount; i++) {
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
if (WaitUtils::waitFunction(partitionAddress, taskCountToWait)) {
break;
}
if (enableTimeout) {
time2 = std::chrono::high_resolution_clock::now();
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
}
}
if (enableTimeout) {
time2 = std::chrono::high_resolution_clock::now();
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
}
partitionAddress = ptrOffset(partitionAddress, offsetSize);
}
if (*pollAddress >= taskCountToWait) {
return true;
partitionAddress = pollAddress;
for (uint32_t i = 0; i < partitionCount; i++) {
if (*partitionAddress < taskCountToWait) {
return false;
}
partitionAddress = ptrOffset(partitionAddress, offsetSize);
}
return false;
return true;
}
void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {

View File

@@ -124,7 +124,7 @@ class CommandStreamReceiver {
MOCKABLE_VIRTUAL volatile uint32_t *getTagAddress() const { return tagAddress; }
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; };
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) { return true; };
uint32_t peekTaskCount() const { return taskCount; }
@@ -156,9 +156,9 @@ class CommandStreamReceiver {
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; }
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) = 0;
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize);
virtual void downloadAllocations(){};
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }

View File

@@ -76,7 +76,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
bool isPipelineSelectAlreadyProgrammed() const;
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
const HardwareInfo &peekHwInfo() const;
void collectStateBaseAddresPatchInfo(

View File

@@ -865,7 +865,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
}
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
updateTagFromWait();
int64_t waitTimeout = 0;
@@ -877,11 +877,20 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal
"\nWaiting for task count %u at location %p. Current value: %u\n",
taskCountToWait, getTagAddress(), *getTagAddress());
auto status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
bool status;
if (partitionCount > 1) {
status = waitForCompletionWithTimeout(getTagAddress(), enableTimeout, waitTimeout, taskCountToWait, partitionCount, offsetSize);
} else {
status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
}
if (!status) {
waitForFlushStamp(flushStampToWait);
waitForFlushStamp(flushStampToWait, partitionCount, offsetSize);
//now call blocking wait, this is to ensure that task count is reached
waitForCompletionWithTimeout(false, 0, taskCountToWait);
if (partitionCount > 1) {
status = waitForCompletionWithTimeout(getTagAddress(), false, 0, taskCountToWait, partitionCount, offsetSize);
} else {
status = waitForCompletionWithTimeout(false, 0, taskCountToWait);
}
}
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);
@@ -1116,7 +1125,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
lock.unlock();
if (blocking) {
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false);
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false, 1, 0);
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
}

View File

@@ -44,7 +44,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
void downloadAllocations() override;

View File

@@ -481,9 +481,9 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
}
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
flushSubmissionsAndDownloadAllocations();
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
}
template <typename GfxFamily>

View File

@@ -77,7 +77,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
return true;
}
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override {
}
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };