performance: add time measurement between make resident and wait on gpu

Related-To: NEO-8211

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-07-26 19:10:25 +00:00
committed by Compute-Runtime-Automation
parent 8778a0acca
commit 0d8523c386
6 changed files with 88 additions and 7 deletions

View File

@@ -1052,6 +1052,8 @@ unsigned int Wddm::getEnablePreemptionRegValue() {
}
bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
perfLogStartWaitTime(residencyLogger.get(), currentPagingFenceValue);
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU waitOnGpu = {};
waitOnGpu.hContext = context;
@@ -1062,6 +1064,7 @@ bool Wddm::waitOnGPU(D3DKMT_HANDLE context) {
waitOnGpu.MonitoredFenceValueArray = &localPagingFenceValue;
NTSTATUS status = getGdi()->waitForSynchronizationObjectFromGpu(&waitOnGpu);
perfLogResidencyWaitPagingeFenceLog(residencyLogger.get(), *getPagingFenceAddress(), true);
return status == STATUS_SUCCESS;
}
@@ -1203,7 +1206,7 @@ void Wddm::waitOnPagingFenceFromCpu() {
while (currentPagingFenceValue > *getPagingFenceAddress())
perfLogResidencyEnteredWait(residencyLogger.get());
perfLogResidencyWaitPagingeFenceLog(residencyLogger.get(), *getPagingFenceAddress());
perfLogResidencyWaitPagingeFenceLog(residencyLogger.get(), *getPagingFenceAddress(), false);
}
void Wddm::updatePagingFenceValue(uint64_t newPagingFenceValue) {

View File

@@ -59,7 +59,7 @@ class WddmResidencyLogger {
enterWait = true;
}
void waitPagingeFenceLog(UINT64 stopWaitPagingFence) {
void waitPagingeFenceLog(UINT64 stopWaitPagingFence, bool gpuWait) {
endTime = std::chrono::high_resolution_clock::now();
int64_t timeDiff = 0;
@@ -75,16 +75,23 @@ class WddmResidencyLogger {
timeDiff);
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(endTime - waitStartTime).count();
IoFunctions::fprintf(pagingLog, "waiting: %x delta time wait loop: %lld\n", enterWait, timeDiff);
IoFunctions::fprintf(pagingLog, "waiting: %x delta time wait loop: %lld wait on GPU: %d\n", enterWait, timeDiff, gpuWait);
if (trimBudgetTime != std::chrono::high_resolution_clock::time_point::max()) {
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(endTime - trimBudgetTime).count();
IoFunctions::fprintf(pagingLog, "waiting delta time trim to budget: %lld\n", timeDiff);
}
makeResidentCall = false;
enterWait = false;
makeResidentPagingFence = 0;
startWaitPagingFence = 0;
trimBudgetTime = std::chrono::high_resolution_clock::time_point::max();
}
void trimRequired(UINT64 numBytesToTrim) {
IoFunctions::fprintf(pagingLog, "trimming required: bytes to trim: %llu\n", numBytesToTrim);
trimBudgetTime = std::chrono::high_resolution_clock::now();
}
void variadicLog(char const *const formatStr, va_list arg) {
@@ -109,6 +116,7 @@ class WddmResidencyLogger {
std::chrono::high_resolution_clock::time_point pendingTime;
std::chrono::high_resolution_clock::time_point waitStartTime;
std::chrono::high_resolution_clock::time_point endTime;
std::chrono::high_resolution_clock::time_point trimBudgetTime = std::chrono::high_resolution_clock::time_point::max();
UINT64 makeResidentPagingFence = 0ull;
UINT64 startWaitPagingFence = 0ull;
@@ -152,10 +160,10 @@ inline void perfLogResidencyEnteredWait(WddmResidencyLogger *log) {
}
}
inline void perfLogResidencyWaitPagingeFenceLog(WddmResidencyLogger *log, UINT64 stopWaitPagingFence) {
inline void perfLogResidencyWaitPagingeFenceLog(WddmResidencyLogger *log, UINT64 stopWaitPagingFence, bool gpuWait) {
if constexpr (wddmResidencyLoggingAvailable) {
if (log) {
log->waitPagingeFenceLog(stopWaitPagingFence);
log->waitPagingeFenceLog(stopWaitPagingFence, gpuWait);
}
}
}