This commit is contained in:
Christopher Taylor 2025-05-16 13:09:51 -04:00 committed by GitHub
commit c9d50346e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 120 additions and 2 deletions

View File

@ -167,6 +167,8 @@ endif()
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
set(GMMLIB_MARCH "armv8-a+fp+simd")
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
set(GMMLIB_MARCH "rv64g")
elseif("${GMMLIB_MARCH}" STREQUAL "")
set(GMMLIB_MARCH "corei7")
endif()
@ -302,6 +304,7 @@ set(SOURCES_
${BS_DIR_GMMLIB}/Texture/GmmTextureSpecialCases.cpp
${BS_DIR_GMMLIB}/Texture/GmmTextureOffset.cpp
${BS_DIR_GMMLIB}/GlobalInfo/GmmInfo.cpp
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
${BS_DIR_GMMLIB}/Utility/GmmLog/GmmLog.cpp
${BS_DIR_GMMLIB}/Utility/GmmUtility.cpp
@ -579,6 +582,9 @@ if(UNIX)
FILES_MATCHING PATTERN "*.h"
PATTERN "*.hpp")
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/riscv_sse2_support.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)
install (FILES ${BS_DIR_GMMLIB}/Utility/CpuSwizzleBlt/CpuSwizzleBlt.c
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/igdgmm/GmmLib/Utility/CpuSwizzleBlt/ COMPONENT gmmlib-devel)

View File

@ -55,6 +55,40 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^aarch")
-fPIC
-g
)
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv")
SET (GMMLIB_COMPILER_FLAGS_COMMON
#general warnings
#-Wall
-Winit-self
-Winvalid-pch
-Wpointer-arith
-Wno-unused
-Wno-unknown-pragmas
-Wno-comments
-Wno-narrowing
-Wno-overflow
-Wno-parentheses
-Wno-missing-braces
-Wno-sign-compare
-Werror=address
-Werror=format-security
-Werror=return-type
# General optimization options
-march=${GMMLIB_MARCH}
-finline-functions
-fno-short-enums
-Wa,--noexecstack
-fno-strict-aliasing
# Other common flags
-fstack-protector
-fdata-sections
-ffunction-sections
-fmessage-length=0
-fvisibility=hidden
-fPIC
-g
)
else()
SET (GMMLIB_COMPILER_FLAGS_COMMON
#general warnings

View File

@ -375,6 +375,8 @@ extern void CpuSwizzleBlt(CPU_SWIZZLE_BLT_SURFACE *pDest, CPU_SWIZZLE_BLT_SURFAC
#include <intrin.h>
#elif defined(__ARM_ARCH)
#include <sse2neon.h>
#elif defined(__riscv)
#include "riscv_sse2_support.h"
#elif((defined __clang__) ||(__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 5)))
#include <cpuid.h>
#include <x86intrin.h>
@ -427,7 +429,10 @@ int SwizzleOffset( // ##########################################################
if(PDepSupported == -1)
{
#if(_MSC_VER >= 1700)
#if defined(__riscv)
#define PDEP(Src, Mask) 0
PDepSupported = 0;
#elif(_MSC_VER >= 1700)
#define PDEP(Src, Mask) _pdep_u32((Src), (Mask))
int CpuInfo[4];
__cpuidex(CpuInfo, 7, 0);
@ -692,21 +697,34 @@ void CpuSwizzleBlt( // #########################################################
} __m24; // 24-bit/3-byte memory element.
// Macros intended to compile to various types of "load register from memory" instructions...
#if defined(__riscv)
#define MOVB_R( Reg, Src) memcpy((uint8_t *)&(Reg), (uint8_t *)(Src), sizeof(__m128i))
#define MOVW_R( Reg, Src) memcpy((uint16_t *)&(Reg), (uint16_t *)(Src), sizeof(__m128i))
#define MOV3_R( Reg, Src) memcpy((__m24 *)&(Reg), (__m24 *)(Src), sizeof(__m24))
#define MOVD_R( Reg, Src) memcpy((uint32_t *)&(Reg), (uint32_t *)(Src), sizeof(__m128i))
#else
#define MOVB_R( Reg, Src) (*(uint8_t *)&(Reg) = *(uint8_t *)(Src))
#define MOVW_R( Reg, Src) (*(uint16_t *)&(Reg) = *(uint16_t *)(Src))
#define MOV3_R( Reg, Src) (*(__m24 *)&(Reg) = *(__m24 *)(Src))
#define MOVD_R( Reg, Src) (*(uint32_t *)&(Reg) = *(uint32_t *)(Src))
#endif
#define MOVQ_R( Reg, Src) ((Reg) = _mm_loadl_epi64((__m128i *)(Src)))
#define MOVDQ_R( Reg, Src) ((Reg) = _mm_load_si128( (__m128i *)(Src)))
#define MOVDQU_R(Reg, Src) ((Reg) = _mm_loadu_si128((__m128i *)(Src)))
// As above, but the other half: "store to memory from register"...
#if defined(__riscv)
#define MOVB_M( Dest, Reg) memcpy((uint8_t *)(Dest), (uint8_t *)&(Reg), sizeof(__m128i))
#define MOVW_M( Dest, Reg) memcpy((uint16_t *)(Dest), (uint16_t *)&(Reg), sizeof(__m128i))
#define MOV3_M( Dest, Reg) memcpy((__m24 *)(Dest), (__m24 *)&(Reg), sizeof(__m24))
#define MOVD_M( Dest, Reg) memcpy((uint32_t *)(Dest), (uint32_t *)&(Reg), sizeof(__m128i))
#else
#define MOVB_M( Dest, Reg)(*(uint8_t *)(Dest) = *(uint8_t *)&(Reg))
#define MOVW_M( Dest, Reg)(*(uint16_t *)(Dest) = *(uint16_t *)&(Reg))
#define MOV3_M( Dest, Reg)(*(__m24 *)(Dest) = *(__m24 *)&(Reg))
#define MOVD_M( Dest, Reg)(*(uint32_t *)(Dest) = *(uint32_t *)&(Reg))
#endif
#define MOVQ_M( Dest, Reg)(_mm_storel_epi64((__m128i *)(Dest), (Reg)))
#define MOVDQ_M( Dest, Reg)(_mm_store_si128( (__m128i *)(Dest), (Reg)))
#define MOVDQU_M( Dest, Reg)(_mm_storeu_si128((__m128i *)(Dest), (Reg)))
@ -749,6 +767,9 @@ void CpuSwizzleBlt( // #########################################################
#elif(defined(__ARM_ARCH))
#define MOVNTDQA_R(Reg, Src) ((Reg) = (Reg))
StreamingLoadSupported = 0;
#elif(defined(__riscv))
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
StreamingLoadSupported = 0;
#elif((defined __clang__) || (__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 5))
#define MOVNTDQA_R(Reg, Src) ((Reg) = _mm_stream_load_si128((__m128i *)(Src)))
unsigned int eax, ebx, ecx, edx;

View File

@ -0,0 +1,57 @@
#pragma
#ifndef __RISCV_SSE_SUPPORT_HPP__
#define __RISCV_SSE_SUPPORT_HPP__
#if defined(__riscv)
typedef uint16_t __attribute__((vector_size(8))) __m128i;
__m128i _mm_loadl_epi64(__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(uint64_t));
return ret;
}
__m128i _mm_load_si128 (__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}
__m128i _mm_loadu_si128 (__m128i const* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}
void _mm_storel_epi64 (__m128i* mem_addr, __m128i a) {
memcpy(&a, &mem_addr, sizeof(uint64_t));
}
void _mm_store_si128 (__m128i* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}
void _mm_storeu_si128 (__m128i* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}
void _mm_stream_si128 (void* mem_addr, __m128i a) {
memcpy(&mem_addr, &a, sizeof(__m128i));
}
__m128i _mm_stream_load_si128 (void* mem_addr) {
__m128i ret;
memcpy(&ret, &mem_addr, sizeof(__m128i));
return ret;
}
#define RISCV_FENCE(p, s) \
__asm__ __volatile__ ("fence " #p "," #s : : : "memory")
void _mm_sfence() {
RISCV_FENCE(rw,rw);
}
#else
#error "compiling for rv64g (riscv64) but compiler architecture macro undefined"
#endif
#endif