mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Synchronize switching command buffers for all partitions
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6b062a62b8
commit
cd4f3c221a
@@ -257,10 +257,10 @@ void CommandStreamReceiver::cleanupResources() {
|
||||
}
|
||||
|
||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
return waitForCompletionWithTimeout(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
return waitForCompletionWithTimeout(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait, 1u, 0u);
|
||||
}
|
||||
|
||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize) {
|
||||
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||
int64_t timeDiff = 0;
|
||||
|
||||
@@ -275,22 +275,33 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(volatile uint32_t *poll
|
||||
}
|
||||
}
|
||||
|
||||
volatile uint32_t *partitionAddress = pollAddress;
|
||||
|
||||
time1 = std::chrono::high_resolution_clock::now();
|
||||
while (*pollAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
||||
if (WaitUtils::waitFunction(pollAddress, taskCountToWait)) {
|
||||
break;
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
||||
if (WaitUtils::waitFunction(partitionAddress, taskCountToWait)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (enableTimeout) {
|
||||
time2 = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
|
||||
}
|
||||
}
|
||||
|
||||
if (enableTimeout) {
|
||||
time2 = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
|
||||
}
|
||||
partitionAddress = ptrOffset(partitionAddress, offsetSize);
|
||||
}
|
||||
|
||||
if (*pollAddress >= taskCountToWait) {
|
||||
return true;
|
||||
partitionAddress = pollAddress;
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
if (*partitionAddress < taskCountToWait) {
|
||||
return false;
|
||||
}
|
||||
|
||||
partitionAddress = ptrOffset(partitionAddress, offsetSize);
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
|
||||
|
||||
@@ -124,7 +124,7 @@ class CommandStreamReceiver {
|
||||
MOCKABLE_VIRTUAL volatile uint32_t *getTagAddress() const { return tagAddress; }
|
||||
uint64_t getDebugPauseStateGPUAddress() const { return tagAllocation->getGpuAddress() + debugPauseStateAddressOffset; }
|
||||
|
||||
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait) { return true; };
|
||||
virtual bool waitForFlushStamp(FlushStamp &flushStampToWait, uint32_t partitionCount, uint32_t offsetSize) { return true; };
|
||||
|
||||
uint32_t peekTaskCount() const { return taskCount; }
|
||||
|
||||
@@ -156,9 +156,9 @@ class CommandStreamReceiver {
|
||||
void requestStallingPipeControlOnNextFlush() { stallingPipeControlOnNextFlushRequired = true; }
|
||||
bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; }
|
||||
|
||||
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
|
||||
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) = 0;
|
||||
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait, uint32_t partitionCount, uint32_t offsetSize);
|
||||
virtual void downloadAllocations(){};
|
||||
|
||||
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }
|
||||
|
||||
@@ -76,7 +76,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
bool isPipelineSelectAlreadyProgrammed() const;
|
||||
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
|
||||
const HardwareInfo &peekHwInfo() const;
|
||||
|
||||
void collectStateBaseAddresPatchInfo(
|
||||
|
||||
@@ -865,7 +865,7 @@ inline void CommandStreamReceiverHw<GfxFamily>::emitNoop(LinearStream &commandSt
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
|
||||
updateTagFromWait();
|
||||
|
||||
int64_t waitTimeout = 0;
|
||||
@@ -877,11 +877,20 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal
|
||||
"\nWaiting for task count %u at location %p. Current value: %u\n",
|
||||
taskCountToWait, getTagAddress(), *getTagAddress());
|
||||
|
||||
auto status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
|
||||
bool status;
|
||||
if (partitionCount > 1) {
|
||||
status = waitForCompletionWithTimeout(getTagAddress(), enableTimeout, waitTimeout, taskCountToWait, partitionCount, offsetSize);
|
||||
} else {
|
||||
status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
|
||||
}
|
||||
if (!status) {
|
||||
waitForFlushStamp(flushStampToWait);
|
||||
waitForFlushStamp(flushStampToWait, partitionCount, offsetSize);
|
||||
//now call blocking wait, this is to ensure that task count is reached
|
||||
waitForCompletionWithTimeout(false, 0, taskCountToWait);
|
||||
if (partitionCount > 1) {
|
||||
status = waitForCompletionWithTimeout(getTagAddress(), false, 0, taskCountToWait, partitionCount, offsetSize);
|
||||
} else {
|
||||
status = waitForCompletionWithTimeout(false, 0, taskCountToWait);
|
||||
}
|
||||
}
|
||||
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);
|
||||
|
||||
@@ -1116,7 +1125,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
|
||||
lock.unlock();
|
||||
if (blocking) {
|
||||
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false);
|
||||
waitForTaskCountWithKmdNotifyFallback(newTaskCount, flushStampToWait, false, false, 1, 0);
|
||||
internalAllocationStorage->cleanAllocationList(newTaskCount, TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
|
||||
|
||||
bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) override;
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
|
||||
void downloadAllocations() override;
|
||||
|
||||
|
||||
@@ -481,9 +481,9 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode, uint32_t partitionCount, uint32_t offsetSize) {
|
||||
flushSubmissionsAndDownloadAllocations();
|
||||
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode, partitionCount, offsetSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
Reference in New Issue
Block a user