diff --git a/Source/GmmLib/CMakeLists.txt b/Source/GmmLib/CMakeLists.txt index 48d054c..efc3618 100644 --- a/Source/GmmLib/CMakeLists.txt +++ b/Source/GmmLib/CMakeLists.txt @@ -167,6 +167,8 @@ endif() if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch") set(GMMLIB_MARCH "armv8-a+fp+simd") +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv") + set(GMMLIB_MARCH "rv64g") elseif("${GMMLIB_MARCH}" STREQUAL "") set(GMMLIB_MARCH "corei7") endif() @@ -302,6 +304,7 @@ set(SOURCES_ ${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp ${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp ${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp + ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c ${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp ${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp @@ -579,6 +582,9 @@ if(UNIX) FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp") + install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel) + install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel) diff --git a/Source/GmmLib/Linux.cmake b/Source/GmmLib/Linux.cmake index 87b74d8..ed49867 100644 --- a/Source/GmmLib/Linux.cmake +++ b/Source/GmmLib/Linux.cmake @@ -55,6 +55,40 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch") -fPIC -g ) +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv") + SET (GMMLIB_COMPILER_FLAGS_COMMON + #general warnings + #-Wall + -Winit-self + -Winvalid-pch + -Wpointer-arith + -Wno-unused + -Wno-unknown-pragmas + -Wno-comments + -Wno-narrowing + -Wno-overflow + -Wno-parentheses + -Wno-missing-braces + -Wno-sign-compare + -Werror=address + -Werror=format-security + -Werror=return-type + + # General optimization options + -march=${GMMLIB_MARCH} + -finline-functions + -fno-short-enums + -Wa,--noexecstack + -fno-strict-aliasing + # Other common flags + -fstack-protector + -fdata-sections + -ffunction-sections + -fmessage-length=0 + -fvisibility=hidden + -fPIC + -g + ) else() SET (GMMLIB_COMPILER_FLAGS_COMMON #general warnings diff --git a/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c b/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c index e090fd6..e54207d 100644 --- a/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c +++ b/Source/GmmLib/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c @@ -375,6 +375,8 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC #include #elif defined(__ARM_ARCH) #include +#elif defined(__riscv) + #include "riscv_sse2_support.h" #elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))) #include #include @@ -427,7 +429,10 @@ int SwizzleOffset( // ########################################################## if(PDepSupported == -1) { - #if(_MSC_VER >= 1700) + #if defined(__riscv) + #define PDEP(Src, Mask) 0 + PDepSupported = 0; + #elif(_MSC_VER >= 1700) #define PDEP(Src, Mask) _pdep_u32((Src), (Mask)) int CpuInfo[4]; __cpuidex(CpuInfo, 7, 0); @@ -692,21 +697,34 @@ void CpuSwizzleBlt( // ######################################################### } __m24; // 24-bit/3-byte memory element. // Macros intended to compile to various types of "load register from memory" instructions... +#if defined(__riscv) + #define MOVB_R( Reg, Src) memcpy((uint8_t *)&(Reg), (uint8_t *)(Src), sizeof(__m128i)) + #define MOVW_R( Reg, Src) memcpy((uint16_t *)&(Reg), (uint16_t *)(Src), sizeof(__m128i)) + #define MOV3_R( Reg, Src) memcpy((__m24 *)&(Reg), (__m24 *)(Src), sizeof(__m24)) + #define MOVD_R( Reg, Src) memcpy((uint32_t *)&(Reg), (uint32_t *)(Src), sizeof(__m128i)) +#else #define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src)) #define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src)) #define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src)) #define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src)) +#endif #define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src))) #define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src))) #define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src))) // As above, but the other half: "store to memory from register"... +#if defined(__riscv) + #define MOVB_M( Dest, Reg) memcpy((uint8_t *)(Dest), (uint8_t *)&(Reg), sizeof(__m128i)) + #define MOVW_M( Dest, Reg) memcpy((uint16_t *)(Dest), (uint16_t *)&(Reg), sizeof(__m128i)) + #define MOV3_M( Dest, Reg) memcpy((__m24 *)(Dest), (__m24 *)&(Reg), sizeof(__m24)) + #define MOVD_M( Dest, Reg) memcpy((uint32_t *)(Dest), (uint32_t *)&(Reg), sizeof(__m128i)) +#else #define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg)) #define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg)) #define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg)) #define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg)) - +#endif #define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg))) #define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg))) #define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg))) @@ -749,6 +767,9 @@ void CpuSwizzleBlt( // ######################################################### #elif(defined(__ARM_ARCH)) #define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg)) StreamingLoadSupported = 0; + #elif(defined(__riscv)) + #define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src))) + StreamingLoadSupported = 0; #elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)) #define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src))) unsigned int eax, ebx, ecx, edx; diff --git a/Source/GmmLib/Utility/CpuSwizzleBlt/riscv_sse2_support.h b/Source/GmmLib/Utility/CpuSwizzleBlt/riscv_sse2_support.h new file mode 100644 index 0000000..26dfa3c --- /dev/null +++ b/Source/GmmLib/Utility/CpuSwizzleBlt/riscv_sse2_support.h @@ -0,0 +1,57 @@ +#pragma +#ifndef __RISCV_SSE_SUPPORT_HPP__ +#define __RISCV_SSE_SUPPORT_HPP__ +#if defined(__riscv) + + typedef uint16_t __attribute__((vector_size(8))) __m128i; + + __m128i _mm_loadl_epi64(__m128i const* mem_addr) { + __m128i ret; + memcpy(&ret, &mem_addr, sizeof(uint64_t)); + return ret; + } + + __m128i _mm_load_si128 (__m128i const* mem_addr) { + __m128i ret; + memcpy(&ret, &mem_addr, sizeof(__m128i)); + return ret; + } + + __m128i _mm_loadu_si128 (__m128i const* mem_addr) { + __m128i ret; + memcpy(&ret, &mem_addr, sizeof(__m128i)); + return ret; + } + + void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) { + memcpy(&a, &mem_addr, sizeof(uint64_t)); + } + + void _mm_store_si128 (__m128i* mem_addr, __m128i a) { + memcpy(&mem_addr, &a, sizeof(__m128i)); + } + + void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) { + memcpy(&mem_addr, &a, sizeof(__m128i)); + } + + void _mm_stream_si128 (void* mem_addr, __m128i a) { + memcpy(&mem_addr, &a, sizeof(__m128i)); + } + + __m128i _mm_stream_load_si128 (void* mem_addr) { + __m128i ret; + memcpy(&ret, &mem_addr, sizeof(__m128i)); + return ret; + } + + #define RISCV_FENCE(p, s) \ + __asm__ __volatile__ ("fence " #p "," #s : : : "memory") + + void _mm_sfence() { + RISCV_FENCE(rw,rw); + } +#else +#error "compiling for rv64g (riscv64) but compiler architecture macro undefined" +#endif +#endif