diff --git a/CMakeLists.txt b/CMakeLists.txt index 88e2ca164a..60e14b97d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -579,11 +579,6 @@ else() set(NEO_TESTS_LISTENER_OPTION "--enable_default_listener") endif() -# Put profiling enable flag into define -if(KMD_PROFILING) - add_definitions(-DKMD_PROFILING=${KMD_PROFILING}) -endif() - if(MSVC) # Force to treat warnings as errors if(NOT CMAKE_CXX_FLAGS MATCHES "/WX") diff --git a/opencl/source/api/api_enter.h b/opencl/source/api/api_enter.h index 5e0bfbf2e0..625843b402 100644 --- a/opencl/source/api/api_enter.h +++ b/opencl/source/api/api_enter.h @@ -12,10 +12,3 @@ #define API_ENTER(retValPointer) \ LoggerApiEnterWrapper::enabled()> ApiWrapperForSingleCall(__FUNCTION__, retValPointer) - -#if KMD_PROFILING == 1 -#undef API_ENTER - -#define API_ENTER(x) \ - PerfProfilerApiWrapper globalPerfProfilersWrapperInstanceForSingleApiFunction(__FUNCTION__) -#endif diff --git a/opencl/test/unit_test/linux/main_linux_dll.cpp b/opencl/test/unit_test/linux/main_linux_dll.cpp index b45186661d..9e44ed1e00 100644 --- a/opencl/test/unit_test/linux/main_linux_dll.cpp +++ b/opencl/test/unit_test/linux/main_linux_dll.cpp @@ -234,7 +234,7 @@ TEST_F(DrmSimpleTests, givenPrintIoctlTimesWhenCallIoctlThenStatisticsAreGathere auto drm = DrmWrap::createDrm(*(mockExecutionEnvironment.rootDeviceEnvironments[0].get())); DebugManagerStateRestore restorer; - debugManager.flags.PrintIoctlTimes.set(true); + debugManager.flags.PrintKmdTimes.set(true); VariableBackup backupForceExtraIoctlDuration(&forceExtraIoctlDuration, true); EXPECT_TRUE(drm->ioctlStatistics.empty()); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3e7f71d1ff..8d15c962f1 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -311,7 +311,7 @@ DECLARE_DEBUG_VARIABLE(bool, PrintBOPrefetchingResult, false, "tracks the result DECLARE_DEBUG_VARIABLE(bool, PrintTagAllocationAddress, false, "Print tag allocation address for each engine") DECLARE_DEBUG_VARIABLE(bool, ProvideVerboseImplicitFlush, false, "provides verbose messages about implicit flush mechanism") DECLARE_DEBUG_VARIABLE(bool, PrintBlitDispatchDetails, false, "Print blit dispatch details") -DECLARE_DEBUG_VARIABLE(bool, PrintIoctlTimes, false, "Print ioctl times") +DECLARE_DEBUG_VARIABLE(bool, PrintKmdTimes, false, "Print ioctl times") DECLARE_DEBUG_VARIABLE(bool, PrintIoctlEntries, false, "Print ioctl being called") DECLARE_DEBUG_VARIABLE(bool, PrintUmdSharedMigration, false, "Print log message when shared allocation is being migrated by UMD") DECLARE_DEBUG_VARIABLE(bool, PrintImageBlitBlockCopyCmdDetails, false, "Prints XY_BLOCK_COPY_BLT command details") diff --git a/shared/source/helpers/options.h b/shared/source/helpers/options.h index 74a1228263..cdf9203647 100644 --- a/shared/source/helpers/options.h +++ b/shared/source/helpers/options.h @@ -8,10 +8,6 @@ #pragma once #include -#ifndef KMD_PROFILING -#define KMD_PROFILING 0 -#endif - namespace NEO { enum CommandStreamReceiverType { // Use receiver for real HW diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index cfba660920..d4c75b7545 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -88,7 +88,7 @@ int Drm::ioctl(DrmIoctl request, void *arg) { int returnedErrno = 0; SYSTEM_ENTER(); do { - auto measureTime = debugManager.flags.PrintIoctlTimes.get(); + auto measureTime = debugManager.flags.PrintKmdTimes.get(); std::chrono::steady_clock::time_point start; std::chrono::steady_clock::time_point end; @@ -644,7 +644,7 @@ std::vector Drm::query(uint32_t queryId, uint32_t queryItemFlags) { } void Drm::printIoctlStatistics() { - if (!debugManager.flags.PrintIoctlTimes.get()) { + if (!debugManager.flags.PrintKmdTimes.get()) { return; } diff --git a/shared/source/os_interface/windows/CMakeLists.txt b/shared/source/os_interface/windows/CMakeLists.txt index eb86b30d19..aa03921187 100644 --- a/shared/source/os_interface/windows/CMakeLists.txt +++ b/shared/source/os_interface/windows/CMakeLists.txt @@ -68,6 +68,7 @@ set(NEO_CORE_OS_INTERFACE_WDDM ${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface_logging.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gdi_interface_logging.h + ${CMAKE_CURRENT_SOURCE_DIR}/gdi_profiling.h ${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id_win.cpp ${CMAKE_CURRENT_SOURCE_DIR}/hw_device_id.h ${CMAKE_CURRENT_SOURCE_DIR}/product_helper_wddm.cpp diff --git a/shared/source/os_interface/windows/gdi_interface.cpp b/shared/source/os_interface/windows/gdi_interface.cpp index 2d616a72c1..b31933920b 100644 --- a/shared/source/os_interface/windows/gdi_interface.cpp +++ b/shared/source/os_interface/windows/gdi_interface.cpp @@ -25,6 +25,7 @@ Gdi::~Gdi() { if constexpr (GdiLogging::gdiLoggingSupport) { GdiLogging::close(); } + this->profiler.printGdiTimes(); } bool Gdi::setupHwQueueProcAddresses() { diff --git a/shared/source/os_interface/windows/gdi_interface.h b/shared/source/os_interface/windows/gdi_interface.h index 98faf301ef..ff1fa3f365 100644 --- a/shared/source/os_interface/windows/gdi_interface.h +++ b/shared/source/os_interface/windows/gdi_interface.h @@ -8,67 +8,73 @@ #pragma once #include "shared/source/os_interface/os_library.h" #include "shared/source/os_interface/windows/d3dkmthk_wrapper.h" +#include "shared/source/os_interface/windows/gdi_profiling.h" #include "shared/source/os_interface/windows/thk_wrapper.h" #include namespace NEO { +#define DEFINE_THK_WRAPPER(TYPE, VAR) ThkWrapper VAR = ThkWrapper(this->profiler, #TYPE, this->gdiId++); + class Gdi { + uint32_t gdiId = 0; + GdiProfiler profiler; + public: Gdi(); MOCKABLE_VIRTUAL ~Gdi(); - ThkWrapper openAdapterFromLuid{}; - ThkWrapper createAllocation{}; - ThkWrapper createAllocation2{}; + DEFINE_THK_WRAPPER(IN OUT CONST D3DKMT_OPENADAPTERFROMLUID *, openAdapterFromLuid); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEALLOCATION *, createAllocation); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEALLOCATION *, createAllocation2); NTSTATUS(APIENTRY *shareObjects) (UINT cObjects, const D3DKMT_HANDLE *hObjects, POBJECT_ATTRIBUTES pObjectAttributes, DWORD dwDesiredAccess, HANDLE *phSharedNtHandle) = {}; - ThkWrapper destroyAllocation{}; - ThkWrapper destroyAllocation2{}; - ThkWrapper queryAdapterInfo{}; - ThkWrapper closeAdapter{}; - ThkWrapper createDevice{}; - ThkWrapper destroyDevice{}; - ThkWrapper escape{}; - ThkWrapper createContext{}; - ThkWrapper destroyContext{}; - ThkWrapper openResource{}; - ThkWrapper openResourceFromNtHandle{}; - ThkWrapper queryResourceInfo{}; - ThkWrapper queryResourceInfoFromNtHandle{}; - ThkWrapper createSynchronizationObject{}; - ThkWrapper createSynchronizationObject2{}; - ThkWrapper destroySynchronizationObject{}; - ThkWrapper signalSynchronizationObject{}; - ThkWrapper waitForSynchronizationObject{}; - ThkWrapper waitForSynchronizationObjectFromCpu{}; - ThkWrapper signalSynchronizationObjectFromCpu{}; - ThkWrapper waitForSynchronizationObjectFromGpu{}; - ThkWrapper signalSynchronizationObjectFromGpu{}; - ThkWrapper createPagingQueue{}; - ThkWrapper destroyPagingQueue{}; - ThkWrapper lock2{}; - ThkWrapper unlock2{}; - ThkWrapper mapGpuVirtualAddress{}; - ThkWrapper reserveGpuVirtualAddress{}; - ThkWrapper freeGpuVirtualAddress{}; - ThkWrapper updateGpuVirtualAddress{}; - ThkWrapper submitCommand{}; - ThkWrapper makeResident{}; - ThkWrapper evict{}; - ThkWrapper registerTrimNotification{}; - ThkWrapper unregisterTrimNotification{}; - ThkWrapper setAllocationPriority{}; - ThkWrapper setSchedulingPriority{}; + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYALLOCATION *, destroyAllocation); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYALLOCATION2 *, destroyAllocation2); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_QUERYADAPTERINFO *, queryAdapterInfo); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_CLOSEADAPTER *, closeAdapter); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEDEVICE *, createDevice); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYDEVICE *, destroyDevice); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_ESCAPE *, escape); + DEFINE_THK_WRAPPER(IN D3DKMT_CREATECONTEXTVIRTUAL *, createContext); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYCONTEXT *, destroyContext); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_OPENRESOURCE *, openResource); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_OPENRESOURCEFROMNTHANDLE *, openResourceFromNtHandle); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_QUERYRESOURCEINFO *, queryResourceInfo); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *, queryResourceInfoFromNtHandle); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT *, createSynchronizationObject); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *, createSynchronizationObject2); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *, destroySynchronizationObject); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *, signalSynchronizationObject); + DEFINE_THK_WRAPPER(IN CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *, waitForSynchronizationObject); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *, waitForSynchronizationObjectFromCpu); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *, signalSynchronizationObjectFromCpu); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *, waitForSynchronizationObjectFromGpu); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *, signalSynchronizationObjectFromGpu); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEPAGINGQUEUE *, createPagingQueue); + DEFINE_THK_WRAPPER(IN OUT D3DDDI_DESTROYPAGINGQUEUE *, destroyPagingQueue); + DEFINE_THK_WRAPPER(IN OUT D3DKMT_LOCK2 *, lock2); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_UNLOCK2 *, unlock2); + DEFINE_THK_WRAPPER(IN OUT D3DDDI_MAPGPUVIRTUALADDRESS *, mapGpuVirtualAddress); + DEFINE_THK_WRAPPER(IN OUT D3DDDI_RESERVEGPUVIRTUALADDRESS *, reserveGpuVirtualAddress); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_FREEGPUVIRTUALADDRESS *, freeGpuVirtualAddress); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *, updateGpuVirtualAddress); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SUBMITCOMMAND *, submitCommand); + DEFINE_THK_WRAPPER(IN OUT D3DDDI_MAKERESIDENT *, makeResident); + DEFINE_THK_WRAPPER(IN D3DKMT_EVICT *, evict); + DEFINE_THK_WRAPPER(IN D3DKMT_REGISTERTRIMNOTIFICATION *, registerTrimNotification); + DEFINE_THK_WRAPPER(IN D3DKMT_UNREGISTERTRIMNOTIFICATION *, unregisterTrimNotification); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SETALLOCATIONPRIORITY *, setAllocationPriority); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *, setSchedulingPriority); // HW queue - ThkWrapper createHwQueue{}; - ThkWrapper destroyHwQueue{}; - ThkWrapper submitCommandToHwQueue{}; + DEFINE_THK_WRAPPER(IN OUT D3DKMT_CREATEHWQUEUE *, createHwQueue); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_DESTROYHWQUEUE *, destroyHwQueue); + DEFINE_THK_WRAPPER(IN CONST D3DKMT_SUBMITCOMMANDTOHWQUEUE *, submitCommandToHwQueue); // For debug purposes - ThkWrapper getDeviceState{}; + DEFINE_THK_WRAPPER(IN OUT D3DKMT_GETDEVICESTATE *, getDeviceState); bool isInitialized() { return initialized; @@ -79,7 +85,7 @@ class Gdi { protected: OsLibrary *createGdiDLL(); MOCKABLE_VIRTUAL bool getAllProcAddresses(); - std::unique_ptr gdiDll; + std::unique_ptr gdiDll = nullptr; bool initialized = false; }; } // namespace NEO diff --git a/shared/source/os_interface/windows/gdi_profiling.h b/shared/source/os_interface/windows/gdi_profiling.h new file mode 100644 index 0000000000..8b59444e3c --- /dev/null +++ b/shared/source/os_interface/windows/gdi_profiling.h @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2018-2024 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include +#include +#include + +namespace NEO { + +class GdiProfiler { + + struct StatisticsEntry { + long long totalTime = 0; + uint64_t count = 0; + long long minTime = std::numeric_limits::max(); + long long maxTime = 0; + const char *gdiCall = nullptr; + size_t getLength() const { + return this->gdiCall ? strlen(this->gdiCall) : 0u; + } + }; + + public: + void printGdiTimes() { + if (this->gdiStatistics.empty()) { + return; + } + + auto maxCallLengthIt = std::max_element(this->gdiStatistics.begin(), this->gdiStatistics.end(), [](const auto &gdiData1, const auto &gdiData2) { + return gdiData1.getLength() < gdiData2.getLength(); + }); + auto maxCallLength = static_cast(strlen(maxCallLengthIt->gdiCall)); + + printf("\n--- Gdi statistics ---\n"); + printf("%*s %15s %10s %25s %15s %15s", maxCallLength, "Request", "Total time(ns)", "Count", "Avg time per gdi call", "Min", "Max\n"); + for (const auto &gdiData : this->gdiStatistics) { + if (gdiData.count == 0) { + continue; + } + printf("%*s %15llu %10lu %25f %15lld %15lld\n", + maxCallLength, + gdiData.gdiCall, + gdiData.totalTime, + static_cast(gdiData.count), + gdiData.totalTime / static_cast(gdiData.count), + gdiData.minTime, + gdiData.maxTime); + } + printf("\n"); + } + + void recordElapsedTime(long long elapsedTime, const char *name, uint32_t id) { + if (this->gdiStatistics.size() <= id) { + this->gdiStatistics.resize(id + 1u); + } + + auto &gdiData = this->gdiStatistics[id]; + + gdiData.gdiCall = name; + gdiData.totalTime += elapsedTime; + gdiData.count++; + gdiData.minTime = std::min(gdiData.minTime, elapsedTime); + gdiData.maxTime = std::max(gdiData.maxTime, elapsedTime); + } + + protected: + std::vector gdiStatistics{}; +}; +} // namespace NEO \ No newline at end of file diff --git a/shared/source/os_interface/windows/thk_wrapper.h b/shared/source/os_interface/windows/thk_wrapper.h index a886ce8c4c..89dac38127 100644 --- a/shared/source/os_interface/windows/thk_wrapper.h +++ b/shared/source/os_interface/windows/thk_wrapper.h @@ -6,94 +6,56 @@ */ #pragma once +#include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/options.h" #include "shared/source/os_interface/windows/d3dkmthk_wrapper.h" #include "shared/source/os_interface/windows/gdi_interface_logging.h" +#include "shared/source/os_interface/windows/gdi_profiling.h" #include "shared/source/os_interface/windows/windows_wrapper.h" #include "shared/source/utilities/api_intercept.h" +#include +#include +#include + namespace NEO { -// Default template for GetID( ) for Thk function, causing compilation error !! -// Returns ID for specific ThkWrapper type -template -constexpr unsigned int getThkWrapperId() { - static_assert(sizeof(Param) > sizeof(Param) + 1, "Template specialization for GetID is required for each new THKWrapper"); - return 0; -} - -// Template specializations of GetID(), required for every new Thk function -#define GET_ID(TYPE, VALUE) \ - template <> \ - constexpr unsigned int getThkWrapperId() { \ - return 0; \ - } - -GET_ID(CONST D3DKMT_OPENADAPTERFROMLUID *, SYSTIMER_ID_OPENADAPTERFROMLUID) -GET_ID(CONST D3DKMT_CLOSEADAPTER *, SYSTIMER_ID_CLOSEADAPTER) -GET_ID(CONST D3DKMT_QUERYADAPTERINFO *, SYSTIMER_ID_QUERYADAPTERINFO) -GET_ID(CONST D3DKMT_ESCAPE *, SYSTIMER_ID_ESCAPE) -GET_ID(D3DKMT_CREATEDEVICE *, SYSTIMER_ID_CREATEDEVICE) -GET_ID(CONST D3DKMT_DESTROYDEVICE *, SYSTIMER_ID_DESTROYDEVICE) -GET_ID(D3DKMT_CREATECONTEXT *, SYSTIMER_ID_CREATECONTEXT) -GET_ID(CONST D3DKMT_DESTROYCONTEXT *, SYSTIMER_ID_DESTROYCONTEXT) -GET_ID(D3DKMT_CREATEALLOCATION *, SYSTIMER_ID_CREATEALLOCATION) -GET_ID(D3DKMT_OPENRESOURCE *, SYSTIMER_ID_OPENRESOURCE) -GET_ID(D3DKMT_QUERYRESOURCEINFO *, SYSTIMER_ID_QUERYRESOURCEINFO) -GET_ID(D3DKMT_CREATESYNCHRONIZATIONOBJECT *, SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT) -GET_ID(CONST D3DKMT_DESTROYSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_DESTROYSYNCHRONIZATIONOBJECT) -GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECT) -GET_ID(CONST_FROM_WDK_10_0_18328_0 D3DKMT_WAITFORSYNCHRONIZATIONOBJECT *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECT) -GET_ID(D3DKMT_CREATESYNCHRONIZATIONOBJECT2 *, SYSTIMER_ID_CREATESYNCHRONIZATIONOBJECT2) -GET_ID(D3DKMT_GETDEVICESTATE *, SYSTIMER_ID_GETDEVICESTATE) -GET_ID(D3DDDI_MAKERESIDENT *, SYSTIMER_ID_MAKERESIDENT) -GET_ID(D3DKMT_EVICT *, SYSTIMER_ID_EVICT) -GET_ID(CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMCPU) -GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMCPU) -GET_ID(CONST D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU *, SYSTIMER_ID_WAITFORSYNCHRONIZATIONOBJECTFROMGPU) -GET_ID(CONST D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU *, SYSTIMER_ID_SIGNALSYNCHRONIZATIONOBJECTFROMGPU) -GET_ID(D3DKMT_CREATEPAGINGQUEUE *, SYSTIMER_ID_CREATEPAGINGQUEUE) -GET_ID(D3DDDI_DESTROYPAGINGQUEUE *, SYSTIMER_ID_D3DDDI_DESTROYPAGINGQUEUE) -GET_ID(D3DKMT_LOCK2 *, SYSTIMER_ID_LOCK2) -GET_ID(CONST D3DKMT_UNLOCK2 *, SYSTIMER_ID_UNLOCK2) -GET_ID(CONST D3DKMT_INVALIDATECACHE *, SYSTIMER_ID_INVALIDATECACHE) -GET_ID(D3DDDI_MAPGPUVIRTUALADDRESS *, SYSTIMER_ID_D3DDDI_MAPGPUVIRTUALADDRESS) -GET_ID(D3DDDI_RESERVEGPUVIRTUALADDRESS *, SYSTIMER_ID_D3DDDI_RESERVEGPUVIRTUALADDRESS) -GET_ID(CONST D3DKMT_FREEGPUVIRTUALADDRESS *, SYSTIMER_ID_FREEGPUVIRTUALADDRESS) -GET_ID(CONST D3DKMT_UPDATEGPUVIRTUALADDRESS *, SYSTIMER_ID_UPDATEGPUVIRTUALADDRESS) -GET_ID(D3DKMT_CREATECONTEXTVIRTUAL *, SYSTIMER_ID_CREATECONTEXTVIRTUAL) -GET_ID(CONST D3DKMT_SUBMITCOMMAND *, SYSTIMER_ID_SUBMITCOMMAND) -GET_ID(D3DKMT_OPENSYNCOBJECTFROMNTHANDLE2 *, SYSTIMER_ID_OPENSYNCOBJECTFROMNTHANDLE2) -GET_ID(CONST D3DKMT_DESTROYALLOCATION2 *, SYSTIMER_ID_DESTROYALLOCATION2) -GET_ID(D3DKMT_REGISTERTRIMNOTIFICATION *, SYSTIMER_ID_REGISTERTRIMNOTIFICATION) -GET_ID(D3DKMT_UNREGISTERTRIMNOTIFICATION *, SYSTIMER_ID_UNREGISTERTRIMNOTIFICATION) -GET_ID(D3DKMT_OPENRESOURCEFROMNTHANDLE *, SYSTIMER_ID_OPENRESOURCEFROMNTHANDLE) -GET_ID(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *, SYSTIMER_ID_QUERYRESOURCEINFOFROMNTHANDLE) -GET_ID(D3DKMT_CREATEHWQUEUE *, SYSTIMER_ID_CREATEHWQUEUE) -GET_ID(CONST D3DKMT_DESTROYHWQUEUE *, SYSTIMER_ID_DESTROYHWQUEUE) -GET_ID(CONST D3DKMT_SUBMITCOMMANDTOHWQUEUE *, SYSTIMER_ID_SUBMITCOMMANDTOHWQUEUE) -GET_ID(CONST D3DKMT_SETALLOCATIONPRIORITY *, SYSTIMER_ID_SETALLOCATIONPRIORITY) -GET_ID(CONST D3DKMT_SETCONTEXTSCHEDULINGPRIORITY *, SYSTIMER_ID_SETCONTEXTSCHEDULINGPRIORITY) template class ThkWrapper { typedef NTSTATUS(APIENTRY *Func)(Param); + GdiProfiler &profiler; + const std::string name{}; + const uint32_t id{}; public: + ThkWrapper(GdiProfiler &profiler, const char *name, uint32_t id) : profiler(profiler), name(name), id(id){}; + Func mFunc = nullptr; inline NTSTATUS operator()(Param param) const { - if (KMD_PROFILING) { - SYSTEM_ENTER() - NTSTATUS status; - status = mFunc(param); - SYSTEM_LEAVE(getId()); - return status; - } else if constexpr (GdiLogging::gdiLoggingSupport) { - NTSTATUS status; + if constexpr (GdiLogging::gdiLoggingSupport) { GdiLogging::logEnter(param); - status = mFunc(param); - GdiLogging::logExit(status, param); - return status; + + auto measureTime = debugManager.flags.PrintKmdTimes.get(); + std::chrono::steady_clock::time_point start; + std::chrono::steady_clock::time_point end; + + if (measureTime) { + start = std::chrono::steady_clock::now(); + } + + auto ret = mFunc(param); + + if (measureTime) { + end = std::chrono::steady_clock::now(); + long long elapsedTime = std::chrono::duration_cast(end - start).count(); + + profiler.recordElapsedTime(elapsedTime, this->name.c_str(), this->id); + } + + GdiLogging::logExit(ret, param); + + return ret; } else { return mFunc(param); } diff --git a/shared/source/utilities/api_intercept.h b/shared/source/utilities/api_intercept.h index 441a30b247..68b382af28 100644 --- a/shared/source/utilities/api_intercept.h +++ b/shared/source/utilities/api_intercept.h @@ -10,21 +10,3 @@ #define SYSTEM_LEAVE(id) #define WAIT_ENTER() #define WAIT_LEAVE() - -#if KMD_PROFILING == 1 -#undef SYSTEM_ENTER -#undef SYSTEM_LEAVE -#undef WAIT_ENTER -#undef WAIT_LEAVE - -#define SYSTEM_ENTER() \ - PerfProfiler::create(); \ - gPerfProfiler->systemEnter(); - -#define SYSTEM_LEAVE(id) \ - gPerfProfiler->systemLeave(id); -#define WAIT_ENTER() \ - SYSTEM_ENTER() -#define WAIT_LEAVE() \ - SYSTEM_LEAVE(0) -#endif diff --git a/shared/source/utilities/perf_profiler.h b/shared/source/utilities/perf_profiler.h index f56d0c678a..756883a689 100644 --- a/shared/source/utilities/perf_profiler.h +++ b/shared/source/utilities/perf_profiler.h @@ -95,23 +95,4 @@ class PerfProfiler { std::unique_ptr sysLogFile; std::vector systemLogs; }; - -#if KMD_PROFILING == 1 - -extern thread_local PerfProfiler *gPerfProfiler; - -struct PerfProfilerApiWrapper { - PerfProfilerApiWrapper(const char *funcName) - : funcName(funcName) { - PerfProfiler::create(); - gPerfProfiler->apiEnter(); - } - - ~PerfProfilerApiWrapper() { - gPerfProfiler->apiLeave(funcName); - } - - const char *funcName; -}; -#endif }; // namespace NEO diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 4972e43f99..6abac282e2 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -260,7 +260,7 @@ EnableHostPointerImport = -1 EnableHostUsmSupport = -1 ForceBtpPrefetchMode = -1 OverrideProfilingTimerResolution = -1 -PrintIoctlTimes = 0 +PrintKmdTimes = 0 PrintIoctlEntries = 0 PrintUmdSharedMigration = 0 UpdateTaskCountFromWait = -1 diff --git a/shared/test/unit_test/os_interface/windows/gdi_interface_tests.cpp b/shared/test/unit_test/os_interface/windows/gdi_interface_tests.cpp index d31be7b85c..38f0c090d5 100644 --- a/shared/test/unit_test/os_interface/windows/gdi_interface_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/gdi_interface_tests.cpp @@ -47,8 +47,34 @@ TEST(GdiInterface, givenGdiOverridePathWhenGdiInterfaceIsCalledThenOverridePathI Os::gdiDllName = oldName; } +TEST(GdiInterface, givenPrintKmdTimesWhenCallThkWrapperThenRecordTime) { + if (!GdiLogging::gdiLoggingSupport) { + GTEST_SKIP(); + } + + DebugManagerStateRestore dbgRestorer; + debugManager.flags.PrintKmdTimes.set(1); + + auto gdi = std::make_unique(); + EXPECT_TRUE(gdi->isInitialized()); + + testing::internal::CaptureStdout(); + + D3DKMT_OPENADAPTERFROMLUID param = {}; + gdi->openAdapterFromLuid(¶m); + gdi->openAdapterFromLuid(¶m); + D3DKMT_CLOSEADAPTER closeAdapter = {}; + closeAdapter.hAdapter = param.hAdapter; + gdi->closeAdapter(&closeAdapter); + + gdi.reset(); + auto output = testing::internal::GetCapturedStdout(); + EXPECT_TRUE(output.find("\n--- Gdi statistics ---\n") != std::string::npos); +} + TEST(ThkWrapperTest, givenThkWrapperWhenConstructedThenmFuncIsInitialized) { - NEO::ThkWrapper wrapper; + GdiProfiler profiler{}; + NEO::ThkWrapper wrapper(profiler, "nullptr", 0u); EXPECT_EQ(nullptr, wrapper.mFunc); }