Add wddm implementation for detection whether cpu copy is required.

Change-Id: Ia8dbd38b2e701bf56148785815599e15f4711b66
Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2020-03-11 11:35:26 +01:00
committed by sys_ocldev
parent d5fd28b0ca
commit 490e9e666b
3 changed files with 95 additions and 0 deletions

View File

@ -1901,3 +1901,30 @@ TEST(WddmMemoryManager, givenMultipleRootDeviceWhenCreateMemoryManagerThenTakeMa
EXPECT_EQ(4u, wddmMemoryManager.getAlignedMallocRestrictions()->minAddress);
}
TEST(WddmMemoryManager, givenNoLocalMemoryOnAnyDeviceWhenIsCpuCopyRequiredIsCalledThenFalseIsReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableLocalMemory.set(false);
VariableBackup<UltHwConfig> backup{&ultHwConfig};
ultHwConfig.useMockedGetDevicesFunc = false;
auto executionEnvironment = platform()->peekExecutionEnvironment();
size_t numRootDevicesReturned;
getDevices(numRootDevicesReturned, *executionEnvironment);
MockWddmMemoryManager wddmMemoryManager(*executionEnvironment);
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&restorer));
}
TEST(WddmMemoryManager, givenLocalPointerPassedToIsCpuCopyRequiredThenFalseIsReturned) {
auto executionEnvironment = platform()->peekExecutionEnvironment();
size_t numRootDevicesReturned;
VariableBackup<UltHwConfig> backup{&ultHwConfig};
ultHwConfig.useMockedGetDevicesFunc = false;
getDevices(numRootDevicesReturned, *executionEnvironment);
MockWddmMemoryManager wddmMemoryManager(*executionEnvironment);
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
//call multiple times to make sure that result is constant
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
EXPECT_FALSE(wddmMemoryManager.isCpuCopyRequired(&numRootDevicesReturned));
}

View File

@ -30,6 +30,7 @@
#include "shared/source/os_interface/windows/wddm_residency_controller.h"
#include <algorithm>
#include <emmintrin.h>
namespace NEO {
@ -559,4 +560,70 @@ void WddmMemoryManager::releaseReservedCpuAddressRange(void *reserved, size_t si
getWddm(rootDeviceIndex).releaseReservedAddress(reserved);
}
bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
//check if any device support local memory
if (std::all_of(this->localMemorySupported.begin(), this->localMemorySupported.end(), [](bool value) { return !value; })) {
return false;
}
//function checks what is the delta between reading from cachead memory
//compare to reading from provided pointer
//if value is above threshold, it means that pointer is uncached.
constexpr auto slownessFactor = 50u;
static int64_t meassurmentOverhead = std::numeric_limits<int64_t>::max();
static int64_t fastestLocalRead = std::numeric_limits<int64_t>::max();
//local variable that we will read for comparison
int cacheable = 1;
volatile int *localVariablePointer = &cacheable;
volatile const int *volatileInputPtr = static_cast<volatile const int *>(ptr);
int64_t timestamp0, timestamp1, localVariableReadDelta, inputPointerReadDelta;
//compute timing overhead
_mm_lfence();
timestamp0 = __rdtsc();
_mm_lfence();
timestamp1 = __rdtsc();
_mm_lfence();
if (timestamp1 - timestamp0 < meassurmentOverhead) {
meassurmentOverhead = timestamp1 - timestamp0;
}
//dummy read
cacheable = *localVariablePointer;
_mm_lfence();
timestamp0 = __rdtsc();
_mm_lfence();
//do read
cacheable = *localVariablePointer;
_mm_lfence();
timestamp1 = __rdtsc();
_mm_lfence();
localVariableReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
if (localVariableReadDelta < 0) {
localVariableReadDelta = 1;
}
if (localVariableReadDelta < fastestLocalRead) {
fastestLocalRead = localVariableReadDelta;
}
//dummy read
cacheable = *volatileInputPtr;
_mm_lfence();
timestamp0 = __rdtsc();
_mm_lfence();
cacheable = *volatileInputPtr;
_mm_lfence();
timestamp1 = __rdtsc();
_mm_lfence();
inputPointerReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
if (inputPointerReadDelta < 0) {
inputPointerReadDelta = 1;
}
return inputPointerReadDelta > slownessFactor * fastestLocalRead;
}
} // namespace NEO

View File

@ -59,6 +59,7 @@ class WddmMemoryManager : public MemoryManager {
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
bool isCpuCopyRequired(const void *ptr) override;
protected:
GraphicsAllocation *createGraphicsAllocation(OsHandleStorage &handleStorage, const AllocationData &allocationData) override;