mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add wddm implementation for detection whether cpu copy is required.
Change-Id: Ia8dbd38b2e701bf56148785815599e15f4711b66 Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:

committed by
sys_ocldev

parent
d5fd28b0ca
commit
490e9e666b
@ -1901,3 +1901,30 @@ TEST(WddmMemoryManager, givenMultipleRootDeviceWhenCreateMemoryManagerThenTakeMa
|
||||
|
||||
EXPECT_EQ(4u, wddmMemoryManager.getAlignedMallocRestrictions()->minAddress);
|
||||
}
|
||||
|
||||
TEST(WddmMemoryManager, givenNoLocalMemoryOnAnyDeviceWhenIsCpuCopyRequiredIsCalledThenFalseIsReturned) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableLocalMemory.set(false);
|
||||
VariableBackup<UltHwConfig> backup{&ultHwConfig};
|
||||
ultHwConfig.useMockedGetDevicesFunc = false;
|
||||
auto executionEnvironment = platform()->peekExecutionEnvironment();
|
||||
size_t numRootDevicesReturned;
|
||||
getDevices(numRootDevicesReturned, *executionEnvironment);
|
||||
MockWddmMemoryManager wddmMemoryManager(*executionEnvironment);
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&restorer));
|
||||
}
|
||||
|
||||
TEST(WddmMemoryManager, givenLocalPointerPassedToIsCpuCopyRequiredThenFalseIsReturned) {
|
||||
auto executionEnvironment = platform()->peekExecutionEnvironment();
|
||||
size_t numRootDevicesReturned;
|
||||
VariableBackup<UltHwConfig> backup{&ultHwConfig};
|
||||
ultHwConfig.useMockedGetDevicesFunc = false;
|
||||
getDevices(numRootDevicesReturned, *executionEnvironment);
|
||||
MockWddmMemoryManager wddmMemoryManager(*executionEnvironment);
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
|
||||
//call multiple times to make sure that result is constant
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
|
||||
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "shared/source/os_interface/windows/wddm_residency_controller.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <emmintrin.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@ -559,4 +560,70 @@ void WddmMemoryManager::releaseReservedCpuAddressRange(void *reserved, size_t si
|
||||
getWddm(rootDeviceIndex).releaseReservedAddress(reserved);
|
||||
}
|
||||
|
||||
bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
||||
//check if any device support local memory
|
||||
if (std::all_of(this->localMemorySupported.begin(), this->localMemorySupported.end(), [](bool value) { return !value; })) {
|
||||
return false;
|
||||
}
|
||||
|
||||
//function checks what is the delta between reading from cachead memory
|
||||
//compare to reading from provided pointer
|
||||
//if value is above threshold, it means that pointer is uncached.
|
||||
constexpr auto slownessFactor = 50u;
|
||||
static int64_t meassurmentOverhead = std::numeric_limits<int64_t>::max();
|
||||
static int64_t fastestLocalRead = std::numeric_limits<int64_t>::max();
|
||||
|
||||
//local variable that we will read for comparison
|
||||
int cacheable = 1;
|
||||
volatile int *localVariablePointer = &cacheable;
|
||||
volatile const int *volatileInputPtr = static_cast<volatile const int *>(ptr);
|
||||
|
||||
int64_t timestamp0, timestamp1, localVariableReadDelta, inputPointerReadDelta;
|
||||
|
||||
//compute timing overhead
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
|
||||
if (timestamp1 - timestamp0 < meassurmentOverhead) {
|
||||
meassurmentOverhead = timestamp1 - timestamp0;
|
||||
}
|
||||
|
||||
//dummy read
|
||||
cacheable = *localVariablePointer;
|
||||
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
//do read
|
||||
cacheable = *localVariablePointer;
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
localVariableReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
|
||||
if (localVariableReadDelta < 0) {
|
||||
localVariableReadDelta = 1;
|
||||
}
|
||||
if (localVariableReadDelta < fastestLocalRead) {
|
||||
fastestLocalRead = localVariableReadDelta;
|
||||
}
|
||||
//dummy read
|
||||
cacheable = *volatileInputPtr;
|
||||
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
cacheable = *volatileInputPtr;
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
inputPointerReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
|
||||
if (inputPointerReadDelta < 0) {
|
||||
inputPointerReadDelta = 1;
|
||||
}
|
||||
return inputPointerReadDelta > slownessFactor * fastestLocalRead;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -59,6 +59,7 @@ class WddmMemoryManager : public MemoryManager {
|
||||
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
|
||||
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
|
||||
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
|
||||
bool isCpuCopyRequired(const void *ptr) override;
|
||||
|
||||
protected:
|
||||
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;
|
||||
|
Reference in New Issue
Block a user