Wait on all tiles when clearing allocation list

Related-To: NEO-6244

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-10-07 18:09:36 +00:00
committed by Compute-Runtime-Automation
parent cdb7287816
commit a51b385e80
6 changed files with 94 additions and 20 deletions

View File

@@ -141,10 +141,9 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf
}
void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) {
auto address = tagAddress;
auto address = getTagAddress();
if (address) {
while (*address < requiredTaskCount)
;
baseWaitFunction(address, false, 0, requiredTaskCount);
}
internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationUsage);
}
@@ -258,9 +257,6 @@ void CommandStreamReceiver::cleanupResources() {
}
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
std::chrono::high_resolution_clock::time_point time1, time2;
int64_t timeDiff = 0;
if (this->latestSentTaskCount < taskCountToWait) {
this->flushTagUpdate();
}
@@ -272,7 +268,14 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
}
}
volatile uint32_t *partitionAddress = getTagAddress();
return baseWaitFunction(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait);
}
bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
std::chrono::high_resolution_clock::time_point time1, time2;
int64_t timeDiff = 0;
volatile uint32_t *partitionAddress = pollAddress;
time1 = std::chrono::high_resolution_clock::now();
for (uint32_t i = 0; i < activePartitions; i++) {
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
@@ -289,7 +292,7 @@ bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int
partitionAddress = ptrOffset(partitionAddress, CommonConstants::partitionAddressOffset);
}
partitionAddress = getTagAddress();
partitionAddress = pollAddress;
for (uint32_t i = 0; i < activePartitions; i++) {
if (*partitionAddress < taskCountToWait) {
return false;
@@ -529,7 +532,13 @@ bool CommandStreamReceiver::initializeTagAllocation() {
}
this->setTagAllocation(tagAllocation);
*this->tagAddress = DebugManager.flags.EnableNullHardware.get() ? -1 : initialHardwareTag;
auto initValue = DebugManager.flags.EnableNullHardware.get() ? static_cast<uint32_t>(-1) : initialHardwareTag;
auto tagAddress = this->tagAddress;
uint32_t subDevices = static_cast<uint32_t>(this->deviceBitfield.count());
for (uint32_t i = 0; i < subDevices; i++) {
*tagAddress = initValue;
tagAddress = ptrOffset(tagAddress, CommonConstants::partitionAddressOffset);
}
*this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore;
PRINT_DEBUG_STRING(DebugManager.flags.PrintTagAllocationAddress.get(), stdout,

View File

@@ -159,6 +159,7 @@ class CommandStreamReceiver {
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
MOCKABLE_VIRTUAL bool baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
virtual void downloadAllocations(){};
void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }