Merge b39f1c72bb
into 3989471ffc
This commit is contained in:
commit
fc4cdfbd17
|
@ -50,6 +50,12 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
|
|||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/sse2neon)
|
||||
set(DISABLE_WDDM_LINUX TRUE)
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
|
||||
set(NEO_TARGET_PROCESSOR "riscv64")
|
||||
if(NOT ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_SYSTEM_PROCESSOR})
|
||||
set(NEO_DISABLE_LD_LLD TRUE)
|
||||
set(NEO_DISABLE_LD_GOLD TRUE)
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "Host processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}")
|
||||
message(STATUS "Target processor: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
@ -788,31 +794,33 @@ if(NOT NEO_DISABLE_MITIGATIONS)
|
|||
message(WARNING "Spectre mitigation is not supported by the compiler")
|
||||
endif()
|
||||
else()
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE)
|
||||
if(COMPILER_SUPPORTS_RETPOLINE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline")
|
||||
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
|
||||
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
|
||||
check_cxx_compiler_flag(-mretpoline COMPILER_SUPPORTS_RETPOLINE)
|
||||
if(COMPILER_SUPPORTS_RETPOLINE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mretpoline")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mretpoline flag is not supported by the compiler")
|
||||
endif()
|
||||
else()
|
||||
check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
|
||||
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler")
|
||||
endif()
|
||||
check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
|
||||
if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler")
|
||||
endif()
|
||||
check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
|
||||
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler")
|
||||
check_cxx_compiler_flag(-mindirect-branch=thunk COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
|
||||
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_THUNK)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch=thunk")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mindirect-branch=thunk flag is not supported by the compiler")
|
||||
endif()
|
||||
check_cxx_compiler_flag(-mfunction-return=thunk COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
|
||||
if(COMPILER_SUPPORTS_FUNCTION_RETURN_THUNK)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfunction-return=thunk")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mfunction-return=thunk flag is not supported by the compiler")
|
||||
endif()
|
||||
check_cxx_compiler_flag(-mindirect-branch-register COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
|
||||
if(COMPILER_SUPPORTS_INDIRECT_BRANCH_REGISTER)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mindirect-branch-register")
|
||||
else()
|
||||
message(WARNING "Spectre mitigation -mindirect-branch-register flag is not supported by the compiler")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
@ -820,10 +828,12 @@ else()
|
|||
message(WARNING "Spectre mitigation DISABLED")
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42)
|
||||
check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2)
|
||||
check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON)
|
||||
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
|
||||
if(NOT MSVC)
|
||||
check_cxx_compiler_flag(-msse4.2 COMPILER_SUPPORTS_SSE42)
|
||||
check_cxx_compiler_flag(-mavx2 COMPILER_SUPPORTS_AVX2)
|
||||
check_cxx_compiler_flag(-march=armv8-a+simd COMPILER_SUPPORTS_NEON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
|
@ -831,22 +841,24 @@ if(NOT MSVC)
|
|||
endif()
|
||||
|
||||
# intrinsics (_mm_clflushopt and waitpkg) support
|
||||
if(NOT MSVC)
|
||||
check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT)
|
||||
check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG)
|
||||
if(SUPPORTS_CLFLUSHOPT)
|
||||
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt")
|
||||
endif()
|
||||
if(SUPPORTS_WAITPKG)
|
||||
add_compile_definitions(SUPPORTS_WAITPKG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg")
|
||||
if(NOT ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
|
||||
if(NOT MSVC)
|
||||
check_cxx_compiler_flag(-mclflushopt SUPPORTS_CLFLUSHOPT)
|
||||
check_cxx_compiler_flag(-mwaitpkg SUPPORTS_WAITPKG)
|
||||
if(SUPPORTS_CLFLUSHOPT)
|
||||
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mclflushopt")
|
||||
endif()
|
||||
if(SUPPORTS_WAITPKG)
|
||||
add_compile_definitions(SUPPORTS_WAITPKG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mwaitpkg")
|
||||
else()
|
||||
message(WARNING "-mwaitpkg flag is not supported by the compiler")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "-mwaitpkg flag is not supported by the compiler")
|
||||
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
|
||||
add_compile_definitions(SUPPORTS_WAITPKG)
|
||||
endif()
|
||||
else()
|
||||
add_compile_definitions(SUPPORTS_CLFLUSHOPT)
|
||||
add_compile_definitions(SUPPORTS_WAITPKG)
|
||||
endif()
|
||||
|
||||
# Compiler warning flags
|
||||
|
|
|
@ -162,6 +162,10 @@ EOF
|
|||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
if [ "${TARGET_ARCH}" == "riscv64" ]; then
|
||||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION
|
||||
export NEO_SKIP_UNIT_TESTS
|
||||
|
||||
|
|
|
@ -130,6 +130,10 @@ EOF
|
|||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
if [ "${TARGET_ARCH}" == "riscv64" ]; then
|
||||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION
|
||||
export NEO_SKIP_UNIT_TESTS
|
||||
|
||||
|
|
|
@ -129,6 +129,10 @@ EOF
|
|||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
if [ "${TARGET_ARCH}" == "riscv64" ]; then
|
||||
NEO_SKIP_UNIT_TESTS="TRUE"
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION="TRUE"
|
||||
fi
|
||||
export NEO_DISABLE_BUILTINS_COMPILATION
|
||||
export NEO_SKIP_UNIT_TESTS
|
||||
|
||||
|
|
|
@ -47,9 +47,11 @@ inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32
|
|||
}
|
||||
|
||||
struct LocalIDHelper {
|
||||
#if !defined(__riscv)
|
||||
static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
||||
static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
||||
static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
|
||||
#endif
|
||||
|
||||
static LocalIDHelper initializer;
|
||||
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
#
|
||||
# Copyright (C) 2019-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64")
|
||||
list(APPEND NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/local_id_gen.cpp
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})
|
||||
endif()
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen_special.inl"
|
||||
#include "shared/source/utilities/cpu_info.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
struct uint16x8_t;
|
||||
struct uint16x16_t;
|
||||
|
||||
// This is the initial value of SIMD for local ID
|
||||
// computation. It correlates to the SIMD lane.
|
||||
// Must be 32byte aligned for AVX2 usage
|
||||
ALIGNAS(32)
|
||||
const uint16_t initialLocalID[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
|
||||
|
||||
// Initialize the lookup table based on CPU capabilities
|
||||
LocalIDHelper::LocalIDHelper() {
|
||||
}
|
||||
|
||||
LocalIDHelper LocalIDHelper::initializer;
|
||||
|
||||
void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
generateLocalIDsForSimdOne(buffer, localWorkgroupSize, dimensionsOrder, grfSize);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -12,8 +12,11 @@
|
|||
#include <cstdint>
|
||||
#if defined(__ARM_ARCH)
|
||||
#include <sse2neon.h>
|
||||
#else
|
||||
#elif defined(__x86_64__) || defined(_M_X64)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__riscv)
|
||||
#include <cstring>
|
||||
typedef std::uint16_t __attribute__((vector_size(8))) __m128i;
|
||||
#endif
|
||||
|
||||
namespace NEO {
|
||||
|
@ -24,14 +27,28 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming)
|
|||
__m128i value;
|
||||
|
||||
uint16x8_t() {
|
||||
#if defined(__riscv)
|
||||
std::memset(&value, 0, sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
value = _mm_setzero_si128();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__riscv)
|
||||
uint16x8_t(__m128i val) {
|
||||
std::memcpy(&value, &val, sizeof(std::uint16_t)*8);
|
||||
}
|
||||
#else
|
||||
uint16x8_t(__m128i value) : value(value) {
|
||||
}
|
||||
#endif
|
||||
|
||||
uint16x8_t(uint16_t a) {
|
||||
#if defined(__riscv)
|
||||
std::memset(&value, a, sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
value = _mm_set1_epi16(a); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
explicit uint16x8_t(const void *alignedPtr) {
|
||||
|
@ -57,57 +74,117 @@ struct uint16x8_t { // NOLINT(readability-identifier-naming)
|
|||
|
||||
inline void load(const void *alignedPtr) {
|
||||
DEBUG_BREAK_IF(!isAligned<16>(alignedPtr));
|
||||
#if defined(__riscv)
|
||||
std::memcpy(&value, reinterpret_cast<const __m128i *>(alignedPtr), sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
value = _mm_load_si128(reinterpret_cast<const __m128i *>(alignedPtr)); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void loadUnaligned(const void *ptr) {
|
||||
#if defined(__riscv)
|
||||
std::memcpy(&value, reinterpret_cast<const __m128i *>(ptr), sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
value = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr)); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void store(void *alignedPtr) {
|
||||
DEBUG_BREAK_IF(!isAligned<16>(alignedPtr));
|
||||
#if defined(__riscv)
|
||||
std::memcpy(alignedPtr, &value, sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(alignedPtr), value); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void storeUnaligned(void *ptr) {
|
||||
#if defined(__riscv)
|
||||
std::memcpy(ptr, &value, sizeof(std::uint16_t)*8);
|
||||
#else
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(ptr), value); // SSE2
|
||||
#endif
|
||||
}
|
||||
|
||||
inline operator bool() const {
|
||||
#if defined(__riscv)
|
||||
unsigned int result = 0;
|
||||
uint16x8_t mask_value = mask();
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
result += value[i] & mask_value.value[i];
|
||||
}
|
||||
return (result < 1);
|
||||
#else
|
||||
return _mm_test_all_zeros(value, mask().value) ? false : true; // SSE4.1 alternatives?
|
||||
#endif
|
||||
}
|
||||
|
||||
inline uint16x8_t &operator-=(const uint16x8_t &a) {
|
||||
#if defined(__riscv)
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
value[i] = value[i] - a.value[i];
|
||||
}
|
||||
#else
|
||||
value = _mm_sub_epi16(value, a.value); // SSE2
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline uint16x8_t &operator+=(const uint16x8_t &a) {
|
||||
#if defined(__riscv)
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
value[i] = value[i] + a.value[i];
|
||||
}
|
||||
#else
|
||||
value = _mm_add_epi16(value, a.value); // SSE2
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline friend uint16x8_t operator>=(const uint16x8_t &a, const uint16x8_t &b) {
|
||||
uint16x8_t result;
|
||||
#if defined(__riscv)
|
||||
std::uint16_t mask = 0;
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
mask = ( a.value[i] < b.value[i] ) ? 1 : 0;
|
||||
result.value[i] = result.value[i] ^ mask;
|
||||
}
|
||||
#else
|
||||
result.value =
|
||||
_mm_xor_si128(mask().value,
|
||||
_mm_cmplt_epi16(a.value, b.value)); // SSE2
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
inline friend uint16x8_t operator&&(const uint16x8_t &a, const uint16x8_t &b) {
|
||||
uint16x8_t result;
|
||||
#if defined(__riscv)
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
result.value[i] = a.value[i] & b.value[i];
|
||||
}
|
||||
#else
|
||||
result.value = _mm_and_si128(a.value, b.value); // SSE2
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
// NOTE: uint16x8_t::blend behaves like mask ? a : b
|
||||
inline friend uint16x8_t blend(const uint16x8_t &a, const uint16x8_t &b, const uint16x8_t &mask) {
|
||||
uint16x8_t result;
|
||||
#if defined(__riscv)
|
||||
for(unsigned int i = 0; i < 8; ++i) {
|
||||
std::uint8_t mask_values[2] = { static_cast<uint8_t>((mask.value[i] << 8) >> 8), static_cast<uint8_t>(mask.value[i] >> 8) };
|
||||
std::uint8_t a_values[2] = { static_cast<uint8_t>((a.value[i] << 8) >> 8), static_cast<uint8_t>(a.value[i] >> 8) };
|
||||
std::uint8_t b_values[2] = { static_cast<uint8_t>((b.value[i] << 8) >> 8), static_cast<uint8_t>(b.value[i] >> 8) };
|
||||
|
||||
result.value[i] = (( mask_values[1] ? a_values[1] : b_values[0] ) << 8 ) | ( mask_values[0] ? a_values[0] : b_values[0] );
|
||||
}
|
||||
#else
|
||||
// Have to swap arguments to get intended calling semantics
|
||||
result.value =
|
||||
_mm_blendv_epi8(b.value, a.value, mask.value); // SSE4.1 alternatives?
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -44,7 +44,9 @@
|
|||
#include "shared/source/utilities/logger_neo_only.h"
|
||||
|
||||
#include <algorithm>
|
||||
#if !defined(__riscv)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -1172,11 +1174,25 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
|||
int64_t timestamp0, timestamp1, localVariableReadDelta, inputPointerReadDelta;
|
||||
|
||||
// compute timing overhead
|
||||
#if defined(__riscv)
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp0)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp1)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
#else
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
#endif
|
||||
|
||||
if (timestamp1 - timestamp0 < meassurmentOverhead) {
|
||||
meassurmentOverhead = timestamp1 - timestamp0;
|
||||
|
@ -1185,14 +1201,32 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
|||
// dummy read
|
||||
cacheable = *localVariablePointer;
|
||||
|
||||
#if defined(__riscv)
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp0)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
#else
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
#endif
|
||||
// do read
|
||||
cacheable = *localVariablePointer;
|
||||
#if defined(__riscv)
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp1)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
#else
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
#endif
|
||||
localVariableReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
|
||||
if (localVariableReadDelta <= 0) {
|
||||
localVariableReadDelta = 1;
|
||||
|
@ -1203,13 +1237,31 @@ bool WddmMemoryManager::isCpuCopyRequired(const void *ptr) {
|
|||
// dummy read
|
||||
cacheable = *volatileInputPtr;
|
||||
|
||||
#if defined(__riscv)
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp0)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
#else
|
||||
_mm_lfence();
|
||||
timestamp0 = __rdtsc();
|
||||
_mm_lfence();
|
||||
#endif
|
||||
cacheable = *volatileInputPtr;
|
||||
#if defined(__riscv)
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(timestamp1)
|
||||
:: );
|
||||
__asm__ __volatile__ ("fence ir, ir" : : : "memory");
|
||||
#else
|
||||
_mm_lfence();
|
||||
timestamp1 = __rdtsc();
|
||||
_mm_lfence();
|
||||
#endif
|
||||
inputPointerReadDelta = timestamp1 - timestamp0 - meassurmentOverhead;
|
||||
if (inputPointerReadDelta <= 0) {
|
||||
inputPointerReadDelta = 1;
|
||||
|
|
|
@ -49,7 +49,9 @@
|
|||
#if __clang__
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
#if !defined(__riscv)
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#define STATUS_GRAPHICS_NO_VIDEO_MEMORY ((NTSTATUS)0xC01E0100L)
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <immintrin.h>
|
||||
#include <intrin.h>
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#elif defined(__riscv)
|
||||
#else
|
||||
#if defined(__ARM_ARCH)
|
||||
extern "C" uint64_t __rdtsc();
|
||||
|
@ -22,6 +23,7 @@ extern "C" uint64_t __rdtsc();
|
|||
|
||||
#if defined(__ARM_ARCH)
|
||||
#include <sse2neon.h>
|
||||
#elif defined(__riscv)
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
@ -30,19 +32,38 @@ namespace NEO {
|
|||
namespace CpuIntrinsics {
|
||||
|
||||
void clFlush(void const *ptr) {
|
||||
#if defined(__riscv)
|
||||
return;
|
||||
#else
|
||||
_mm_clflush(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void clFlushOpt(void *ptr) {
|
||||
#ifdef SUPPORTS_CLFLUSHOPT
|
||||
_mm_clflushopt(ptr);
|
||||
#elif defined(__riscv)
|
||||
return;
|
||||
#else
|
||||
_mm_clflush(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void sfence() {
|
||||
#if defined(__riscv)
|
||||
// According to:
|
||||
//
|
||||
// https://blog.jiejiss.com/Rust-is-incompatible-with-LLVM-at-least-partially/
|
||||
// https://github.com/riscv/riscv-isa-manual/issues/43
|
||||
// https://stackoverflow.com/questions/68537854/pause-instruction-unrecognized-opcode-pause-in-risc-v
|
||||
//
|
||||
// gcc/clang assembler will not currently (2022) accept `fence w,unknown`;
|
||||
// `hand-rolling` the instruction (see below) does the job.
|
||||
//
|
||||
__asm__ __volatile__(".insn i 0x0F, 0, x0, x0, 0x010");
|
||||
#else
|
||||
_mm_sfence();
|
||||
#endif
|
||||
}
|
||||
|
||||
void mfence() {
|
||||
|
@ -50,7 +71,11 @@ void mfence() {
|
|||
}
|
||||
|
||||
void pause() {
|
||||
#if defined(__riscv)
|
||||
__asm__ volatile("fence"::);
|
||||
#else
|
||||
_mm_pause();
|
||||
#endif
|
||||
}
|
||||
|
||||
uint8_t tpause(uint32_t control, uint64_t counter) {
|
||||
|
@ -76,7 +101,16 @@ void umonitor(void *a) {
|
|||
}
|
||||
|
||||
uint64_t rdtsc() {
|
||||
#if defined(__riscv)
|
||||
std::uint64_t val = 0;
|
||||
__asm__ __volatile__(
|
||||
"rdtime %0;\n"
|
||||
: "=r"(val)
|
||||
:: );
|
||||
return val;
|
||||
#else
|
||||
return __rdtsc();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace CpuIntrinsics
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64")
|
||||
list(APPEND NEO_CORE_UTILITIES_LINUX
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cpp
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_UTILITIES_LINUX ${NEO_CORE_UTILITIES_LINUX})
|
||||
endif()
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/cpu_info.h"
|
||||
|
||||
#include "shared/source/os_interface/linux/os_inc.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <sys/auxv.h>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void cpuidLinuxWrapper(int cpuInfo[4], int functionId) {
|
||||
cpuInfo[0] = static_cast<int>(getauxval(AT_HWCAP));
|
||||
}
|
||||
|
||||
void cpuidexLinuxWrapper(int *cpuInfo, int functionId, int subfunctionId) {
|
||||
}
|
||||
|
||||
void getCpuFlagsLinux(std::string &cpuFlags) {
|
||||
std::ifstream cpuinfo(std::string(Os::sysFsProcPathPrefix) + "/cpuinfo");
|
||||
std::string line;
|
||||
while (std::getline(cpuinfo, line)) {
|
||||
if (line.substr(0, 8) == "Features") {
|
||||
cpuFlags = line;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void (*CpuInfo::cpuidexFunc)(int *, int, int) = cpuidexLinuxWrapper;
|
||||
void (*CpuInfo::cpuidFunc)(int[4], int) = cpuidLinuxWrapper;
|
||||
void (*CpuInfo::getCpuFlagsFunc)(std::string &) = getCpuFlagsLinux;
|
||||
|
||||
const CpuInfo CpuInfo::instance;
|
||||
|
||||
void CpuInfo::cpuid(
|
||||
uint32_t cpuInfo[4],
|
||||
uint32_t functionId) const {
|
||||
cpuidFunc(reinterpret_cast<int *>(cpuInfo), functionId);
|
||||
}
|
||||
|
||||
void CpuInfo::cpuidex(
|
||||
uint32_t cpuInfo[4],
|
||||
uint32_t functionId,
|
||||
uint32_t subfunctionId) const {
|
||||
cpuidexFunc(reinterpret_cast<int *>(cpuInfo), functionId, subfunctionId);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -0,0 +1,12 @@
|
|||
#
|
||||
# Copyright (C) 2021 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
|
||||
if(${NEO_TARGET_PROCESSOR} STREQUAL "riscv64")
|
||||
set_property(GLOBAL APPEND PROPERTY NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_info_riscv64.cpp
|
||||
)
|
||||
endif()
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/utilities/cpu_info.h"
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
namespace NEO {
|
||||
void CpuInfo::detect() const {
|
||||
uint32_t cpuInfo[4] = {};
|
||||
cpuid(cpuInfo, 0u);
|
||||
features |= featureNone;
|
||||
}
|
||||
} // namespace NEO
|
Loading…
Reference in New Issue