mirror of https://github.com/intel/gmmlib.git
Merge 97748870c4
into 20050cbe25
This commit is contained in:
commit
c9d50346e5
|
@ -167,6 +167,8 @@ endif()
|
||||||
|
|
||||||
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
|
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
|
||||||
set(GMMLIB_MARCH "armv8-a+fp+simd")
|
set(GMMLIB_MARCH "armv8-a+fp+simd")
|
||||||
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
|
||||||
|
set(GMMLIB_MARCH "rv64g")
|
||||||
elseif("${GMMLIB_MARCH}" STREQUAL "")
|
elseif("${GMMLIB_MARCH}" STREQUAL "")
|
||||||
set(GMMLIB_MARCH "corei7")
|
set(GMMLIB_MARCH "corei7")
|
||||||
endif()
|
endif()
|
||||||
|
@ -302,6 +304,7 @@ set(SOURCES_
|
||||||
${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp
|
${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp
|
||||||
${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp
|
${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp
|
||||||
${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp
|
${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp
|
||||||
|
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
|
||||||
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
||||||
${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp
|
${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp
|
||||||
${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp
|
${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp
|
||||||
|
@ -579,6 +582,9 @@ if(UNIX)
|
||||||
FILES_MATCHING PATTERN "*.h"
|
FILES_MATCHING PATTERN "*.h"
|
||||||
PATTERN "*.hpp")
|
PATTERN "*.hpp")
|
||||||
|
|
||||||
|
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
|
||||||
|
|
||||||
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
|
||||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,40 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
|
||||||
-fPIC
|
-fPIC
|
||||||
-g
|
-g
|
||||||
)
|
)
|
||||||
|
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
|
||||||
|
SET (GMMLIB_COMPILER_FLAGS_COMMON
|
||||||
|
#general warnings
|
||||||
|
#-Wall
|
||||||
|
-Winit-self
|
||||||
|
-Winvalid-pch
|
||||||
|
-Wpointer-arith
|
||||||
|
-Wno-unused
|
||||||
|
-Wno-unknown-pragmas
|
||||||
|
-Wno-comments
|
||||||
|
-Wno-narrowing
|
||||||
|
-Wno-overflow
|
||||||
|
-Wno-parentheses
|
||||||
|
-Wno-missing-braces
|
||||||
|
-Wno-sign-compare
|
||||||
|
-Werror=address
|
||||||
|
-Werror=format-security
|
||||||
|
-Werror=return-type
|
||||||
|
|
||||||
|
# General optimization options
|
||||||
|
-march=${GMMLIB_MARCH}
|
||||||
|
-finline-functions
|
||||||
|
-fno-short-enums
|
||||||
|
-Wa,--noexecstack
|
||||||
|
-fno-strict-aliasing
|
||||||
|
# Other common flags
|
||||||
|
-fstack-protector
|
||||||
|
-fdata-sections
|
||||||
|
-ffunction-sections
|
||||||
|
-fmessage-length=0
|
||||||
|
-fvisibility=hidden
|
||||||
|
-fPIC
|
||||||
|
-g
|
||||||
|
)
|
||||||
else()
|
else()
|
||||||
SET (GMMLIB_COMPILER_FLAGS_COMMON
|
SET (GMMLIB_COMPILER_FLAGS_COMMON
|
||||||
#general warnings
|
#general warnings
|
||||||
|
|
|
@ -375,6 +375,8 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#elif defined(__ARM_ARCH)
|
#elif defined(__ARM_ARCH)
|
||||||
#include <sse2neon.h>
|
#include <sse2neon.h>
|
||||||
|
#elif defined(__riscv)
|
||||||
|
#include "riscv_sse2_support.h"
|
||||||
#elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
|
#elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
|
||||||
#include <cpuid.h>
|
#include <cpuid.h>
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
|
@ -427,7 +429,10 @@ int SwizzleOffset( // ##########################################################
|
||||||
|
|
||||||
if(PDepSupported == -1)
|
if(PDepSupported == -1)
|
||||||
{
|
{
|
||||||
#if(_MSC_VER >= 1700)
|
#if defined(__riscv)
|
||||||
|
#define PDEP(Src, Mask) 0
|
||||||
|
PDepSupported = 0;
|
||||||
|
#elif(_MSC_VER >= 1700)
|
||||||
#define PDEP(Src, Mask) _pdep_u32((Src), (Mask))
|
#define PDEP(Src, Mask) _pdep_u32((Src), (Mask))
|
||||||
int CpuInfo[4];
|
int CpuInfo[4];
|
||||||
__cpuidex(CpuInfo, 7, 0);
|
__cpuidex(CpuInfo, 7, 0);
|
||||||
|
@ -692,21 +697,34 @@ void CpuSwizzleBlt( // #########################################################
|
||||||
} __m24; // 24-bit/3-byte memory element.
|
} __m24; // 24-bit/3-byte memory element.
|
||||||
|
|
||||||
// Macros intended to compile to various types of "load register from memory" instructions...
|
// Macros intended to compile to various types of "load register from memory" instructions...
|
||||||
|
#if defined(__riscv)
|
||||||
|
#define MOVB_R( Reg, Src) memcpy((uint8_t *)&(Reg), (uint8_t *)(Src), sizeof(__m128i))
|
||||||
|
#define MOVW_R( Reg, Src) memcpy((uint16_t *)&(Reg), (uint16_t *)(Src), sizeof(__m128i))
|
||||||
|
#define MOV3_R( Reg, Src) memcpy((__m24 *)&(Reg), (__m24 *)(Src), sizeof(__m24))
|
||||||
|
#define MOVD_R( Reg, Src) memcpy((uint32_t *)&(Reg), (uint32_t *)(Src), sizeof(__m128i))
|
||||||
|
#else
|
||||||
#define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src))
|
#define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src))
|
||||||
#define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src))
|
#define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src))
|
||||||
#define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src))
|
#define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src))
|
||||||
#define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src))
|
#define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src)))
|
#define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src)))
|
||||||
#define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src)))
|
#define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src)))
|
||||||
#define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src)))
|
#define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src)))
|
||||||
|
|
||||||
// As above, but the other half: "store to memory from register"...
|
// As above, but the other half: "store to memory from register"...
|
||||||
|
#if defined(__riscv)
|
||||||
|
#define MOVB_M( Dest, Reg) memcpy((uint8_t *)(Dest), (uint8_t *)&(Reg), sizeof(__m128i))
|
||||||
|
#define MOVW_M( Dest, Reg) memcpy((uint16_t *)(Dest), (uint16_t *)&(Reg), sizeof(__m128i))
|
||||||
|
#define MOV3_M( Dest, Reg) memcpy((__m24 *)(Dest), (__m24 *)&(Reg), sizeof(__m24))
|
||||||
|
#define MOVD_M( Dest, Reg) memcpy((uint32_t *)(Dest), (uint32_t *)&(Reg), sizeof(__m128i))
|
||||||
|
#else
|
||||||
#define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg))
|
#define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg))
|
||||||
#define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg))
|
#define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg))
|
||||||
#define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg))
|
#define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg))
|
||||||
#define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg))
|
#define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg))
|
||||||
|
#endif
|
||||||
#define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg)))
|
#define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg)))
|
||||||
#define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg)))
|
#define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg)))
|
||||||
#define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg)))
|
#define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg)))
|
||||||
|
@ -749,6 +767,9 @@ void CpuSwizzleBlt( // #########################################################
|
||||||
#elif(defined(__ARM_ARCH))
|
#elif(defined(__ARM_ARCH))
|
||||||
#define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))
|
#define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))
|
||||||
StreamingLoadSupported = 0;
|
StreamingLoadSupported = 0;
|
||||||
|
#elif(defined(__riscv))
|
||||||
|
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
|
||||||
|
StreamingLoadSupported = 0;
|
||||||
#elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))
|
#elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))
|
||||||
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
|
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
|
||||||
unsigned int eax, ebx, ecx, edx;
|
unsigned int eax, ebx, ecx, edx;
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
#pragma
|
||||||
|
#ifndef __RISCV_SSE_SUPPORT_HPP__
|
||||||
|
#define __RISCV_SSE_SUPPORT_HPP__
|
||||||
|
#if defined(__riscv)
|
||||||
|
|
||||||
|
typedef uint16_t __attribute__((vector_size(8))) __m128i;
|
||||||
|
|
||||||
|
__m128i _mm_loadl_epi64(__m128i const* mem_addr) {
|
||||||
|
__m128i ret;
|
||||||
|
memcpy(&ret, &mem_addr, sizeof(uint64_t));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i _mm_load_si128 (__m128i const* mem_addr) {
|
||||||
|
__m128i ret;
|
||||||
|
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i _mm_loadu_si128 (__m128i const* mem_addr) {
|
||||||
|
__m128i ret;
|
||||||
|
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) {
|
||||||
|
memcpy(&a, &mem_addr, sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
|
||||||
|
void _mm_store_si128 (__m128i* mem_addr, __m128i a) {
|
||||||
|
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) {
|
||||||
|
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
void _mm_stream_si128 (void* mem_addr, __m128i a) {
|
||||||
|
memcpy(&mem_addr, &a, sizeof(__m128i));
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i _mm_stream_load_si128 (void* mem_addr) {
|
||||||
|
__m128i ret;
|
||||||
|
memcpy(&ret, &mem_addr, sizeof(__m128i));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RISCV_FENCE(p, s) \
|
||||||
|
__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
|
||||||
|
|
||||||
|
void _mm_sfence() {
|
||||||
|
RISCV_FENCE(rw,rw);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#error "compiling for rv64g (riscv64) but compiler architecture macro undefined"
|
||||||
|
#endif
|
||||||
|
#endif
|
Loading…
Reference in New Issue